devtools::install_github("sysbiomed/glmSparseNet", ref = "prognostic.index")
## Skipping install of 'glmSparseNet' from a github remote, the SHA1 (67cd612b) has not changed since last install.
## Use `force = TRUE` to force installation
library(SummarizedExperiment)
library(dplyr)
library(DT)
library(tidyverse)
library(futile.logger)
library(loose.rock)
library(survival)
library(GGally)
library(ggplot2)
library(survminer)
library(glmnet)
library(glmSparseNet)
library(gplots)
library(readr)
library(maftools)
library(RTCGAToolbox)
library(edgeR)
#install.packages("biospear")
library(biospear)
twiner_glmnet <- function (x,y,alpha, w) {
cv.glmnet(as.matrix(x),as.matrix(y),
family = 'cox',foldid = my_foldid, penalty.factor = w,
alpha = alpha,
nlambda = 100,
#lambda.min.ratio = 10^(-2),
nfolds= 10)
}
twiner_coefs <- function (cv.fit){
coef(cv.fit, s = 'lambda.min')[,1] %>% { .[. != 0]}
}
twiner_genes <- function (cv.fit) {
b <- which(cv.fit$glmnet.fit$beta[,which(cv.fit$cvm == min(cv.fit$cvm))] != 0)
return (names(b))
}
train <- function (coefs) {
separate2GroupsCox(as.vector(coefs),
xdata.train[, names(coefs)],
ydata.train,
plot.title = 'Train Set TCox', legend.outside = FALSE)
}
test <- function (coefs) {
separate2GroupsCox(as.vector(coefs),
xdata.test[, names(coefs)],
ydata.test,
plot.title = 'Test Set TCox', legend.outside = FALSE)
}
all <- function (coefs) {
separate2GroupsCox(as.vector(coefs),
xdata[, names(coefs)],
ydata,
plot.title = 'all Set TCox', legend.outside = FALSE)
}
Download COAD dataset
coadData <- getFirehoseData(dataset="COAD", runDate="20150402",
forceDownload=TRUE, clinical=TRUE, RNASeq2GeneNorm=TRUE)
rnaseq1 <- coadData@RNASeq2GeneNorm[[2]]
rnaseq1 <- rnaseq1@DataMatrix
rnaseq1 <- t(rnaseq1)
rnaseq1 <- as.data.frame(rnaseq1)
rnaseq1$names <- rownames(rnaseq1)
dim(rnaseq1) #328
## [1] 328 20502
#keep only tumor samples
tumor <- rnaseq1 %>%
filter(str_detect(names, "01A"))
dim(tumor) #287
## [1] 282 20502
tumor$names <- str_sub(tumor$names, 1, str_length(tumor$names)-16) #put names in the right format
#remove duplicated rows
tumor <- tumor[!duplicated(tumor$names), ]
rownames(tumor) <- tumor$names
tumor <- tumor[,-20502]
normal <- rnaseq1 %>%
filter(!str_detect(names, "01A"))
dim(normal) #287
## [1] 46 20502
normal$names <- str_sub(normal$names, 1, str_length(normal$names)-16) #put names in the right format
normal <- normal[!duplicated(normal$names), ]
rownames(normal) <- normal$names
normal <- normal[,-20502]
Download READ dataset
readData <- getFirehoseData(dataset="READ", runDate="20150402",
forceDownload=TRUE, clinical=TRUE, RNASeq2GeneNorm=TRUE)
rnaseq2 <- readData@RNASeq2GeneNorm[[2]]
rnaseq2 <- rnaseq2@DataMatrix
rnaseq2 <- t(rnaseq2)
rnaseq2 <- as.data.frame(rnaseq2)
rnaseq2$names <- rownames(rnaseq2)
dim(rnaseq2) #328
## [1] 105 20502
#keep only tumor samples
tumorread <- rnaseq2 %>%
filter(str_detect(names, "01A"))
dim(tumorread) #287
## [1] 91 20502
tumorread$names <- str_sub(tumorread$names, 1, str_length(tumorread$names)-16) #put names in the right format
#remove duplicated rows
tumorread <- tumorread[!duplicated(tumorread$names), ]
rownames(tumorread) <- tumorread$names
tumorread <- tumorread[,-20502]
normalread <- rnaseq2 %>%
filter(!str_detect(names, "01A"))
dim(normalread) #287
## [1] 14 20502
normalread$names <- str_sub(normalread$names, 1, str_length(normalread$names)-16) #put names in the right format
normalread <- normalread[!duplicated(normalread$names), ]
rownames(normalread) <- normalread$names
normalread <- normalread[,-20502]
Colorectal dataset
dim(tumor)
## [1] 282 20501
dim(normal)
## [1] 46 20501
dim(tumorread)
## [1] 91 20501
dim(normalread)
## [1] 14 20501
tumorf <- rbind(tumor,tumorread)
dim(tumorf)
## [1] 373 20501
normalf <- rbind(normal, normalread)
dim(normalf)
## [1] 60 20501
# write.table(tumorf, "crcRNAseq.txt")
# write.table(normalf, "normalRNAseq.txt")
#write.table(datasurvf, "datasurv.txt")
COAD
clinic <- coadData@clinical
clinic.data <- clinic[,3:5]
clinic.data$days_to_death[is.na(clinic.data$days_to_death)] <- 0
for (i in 1:nrow(clinic.data)){
if (clinic.data$days_to_death[i] != 0){
clinic.data$days_to_last_followup[i] <- 0
}
datasurv <- apply(as.matrix(clinic.data),2,as.numeric)
datasurv <- as.data.frame(datasurv)
datasurv$time <- datasurv$days_to_death + datasurv$days_to_last_followup
datasurv <- datasurv[,-c(2,3)]
rownames(datasurv) <- rownames(clinic.data)
datasurv <- datasurv[datasurv$time > 0, ]
datasurv <- na.omit(datasurv)
colnames(datasurv) <- c("status","time")
datasurv$names <- rownames(datasurv)
datasurv$names <- toupper(datasurv$names)
datasurv$names <- chartr('.', '-', datasurv$names)
rownames(datasurv) <- datasurv$names
datasurv <- datasurv[,-3]
}
READ
clinicread <- readData@clinical
clinicread.data <- clinicread[,3:5]
clinicread.data$days_to_death[is.na(clinicread.data$days_to_death)] <- 0
for (i in 1:nrow(clinicread.data)){
if (clinicread.data$days_to_death[i] != 0){
clinicread.data$days_to_last_followup[i] <- 0
}
datasurvread <- apply(as.matrix(clinicread.data),2,as.numeric)
datasurvread <- as.data.frame(datasurvread)
datasurvread$time <- datasurvread$days_to_death + datasurvread$days_to_last_followup
datasurvread <- datasurvread[,-c(2,3)]
rownames(datasurvread) <- rownames(clinicread.data)
datasurvread <- datasurvread[datasurvread$time > 0, ]
datasurvread <- na.omit(datasurvread)
colnames(datasurvread) <- c("status","time")
datasurvread$names <- rownames(datasurvread)
datasurvread$names <- toupper(datasurvread$names)
datasurvread$names <- chartr('.', '-', datasurvread$names)
rownames(datasurvread) <- datasurvread$names
datasurvread <- datasurvread[,-3]
}
Colorectal
dim(datasurv)
## [1] 436 2
dim(datasurvread)
## [1] 159 2
datasurvf <- rbind(datasurv,datasurvread)
dim(datasurvf)
## [1] 595 2
clinicalf <- rbind(clinic,clinicread)
xcoad_sd <- tumorf [,sapply(seq(ncol(tumorf)), function(ix) {sd(tumorf[,ix])}) != 0] # 328 20025
xnormal_sd <- normalf[,sapply(seq(ncol(normalf)), function(ix) {sd(normalf[,ix])}) != 0] #345 48710
xcoad_less <- xcoad_sd[,which(colnames(xcoad_sd) %in% colnames(xnormal_sd))]
xnormal_less <- xnormal_sd[,which(colnames(xnormal_sd) %in% colnames(xcoad_sd))]
# normalizing data
xcoad_norm <- scale(log2(xcoad_less+1))
xnorm_norm <- scale(log2(xnormal_less+1))
## correlation matrices
library("propagate")
library("lsa")
options(fftempdir = "~/")
xtumoral_cor <- bigcor(xcoad_norm, y = NULL, fun = "cor", size = 2000, verbose=FALSE)
xtumoral_cor <- as.data.frame(as.ffdf(xtumoral_cor))
xnormal_cor <- bigcor(xnorm_norm, y = NULL, fun = "cor", size = 2000, verbose=FALSE)
xnormal_cor <- as.data.frame(as.ffdf(xnormal_cor))
# angular distance
ang_weight <- vector()
for (i in 1:dim(xcoad_norm)[2]){
ang_weight[i] <- acos(cosine(xtumoral_cor[,i],xnormal_cor[,i]))/pi
}
##normalized weights (between 0 and 1)
tumornormal_weights <- ang_weight / max(ang_weight)
hist(ang_weight)
hist(tumornormal_weights,main="normal weight")
#1-w
tumornormal_weights1 <- 1 - tumornormal_weights
hist(tumornormal_weights1,main="1 - normal weight")
#1-w^3
tumornormal_weights2 <- 1 - (tumornormal_weights^3)
hist(tumornormal_weights2,main="1 - normal weight^3")
#(1-w)^3
tumornormal_weights3 <- (1 - tumornormal_weights)^3
hist(tumornormal_weights3,main="(1 - normal weight)^3")
#exp((1-w)^3)
tumornormal_weights4 <- exp((1 - tumornormal_weights)^3)
hist(tumornormal_weights4,main="exp((1 - normal weight)^3)")
#exp(-w^3)
tumornormal_weights5 <- exp(- tumornormal_weights^3)
hist(tumornormal_weights5,main="exp(- normal weight^3)")
#1/w
tumornormal_weights6 <- 1 / tumornormal_weights
hist(tumornormal_weights6,main="1 / normal weight")
xdata.raw <- as.data.frame(xcoad_less) #255 19525
ydata.raw <- as.data.frame(datasurvf)
ydata.raw$rownames <- rownames(ydata.raw)
ydata.raw$rownames <- chartr('.', '-', ydata.raw$rownames)
rownames(ydata.raw) <- ydata.raw$rownames
ydata <- ydata.raw[rownames(ydata.raw) %in%
rownames(xdata.raw),]
xdata.raw <- xdata.raw[rownames(xdata.raw) %in%
rownames(ydata),]
xdata <- xdata.raw %>%
{ (apply(., 2, sd) != 0) } %>%
{ xdata.raw[, .] } %>%
scale
xdata <- as.matrix(xdata) #293 17910
ydata <- ydata[,1:2]
dim(xdata)
## [1] 357 19630
Test / Training sets
set.seed(params$seed)
ixs <- loose.rock::balanced.train.and.test(which(as.logical(ydata$status)), which(as.logical(!ydata$status)), train.perc = params$train)
xdata.test <- xdata[ixs$test,]
ydata.test <- ydata[ixs$test,]
#
xdata.train <- xdata[ixs$train,]
ydata.train <- ydata[ixs$train,]
xdata.train.digest <- loose.rock::digest.cache(xdata.train)
flog.info('Size of sets: (size/events)\n * Training set: % 4d/% 4d\n * Test set: % 4d/% 4d',
nrow(xdata.train),
sum(ydata.train$status),
nrow(xdata.test),
sum(ydata.test$status))
## INFO [2020-10-23 09:09:36] Size of sets: (size/events)
## * Training set: 249/ 50
## * Test set: 108/ 22
# * Training set: 204/ 50
# * Test set: 89/ 22
#Foldid
my_foldid <- sample(1:10,size=dim(ydata.train)[1],replace=TRUE)
#Remove columns with standard deviation of zero
xdata.train <- xdata.train[, ! apply(xdata.train , 2 , function(x) sd(x, na.rm = TRUE)==0 ) ]
xdata.test <- xdata.test[, ! apply(xdata.test , 2 , function(x) sd(x, na.rm = TRUE)==0 ) ]
dim(xdata.test)
## [1] 108 19572
dim(xdata.train)
## [1] 249 19618
xdata.train <- xdata.train[,colnames(xdata.train) %in%
colnames(xdata.test)]
xdata.test <- xdata.test[,colnames(xdata.test) %in%
colnames(xdata.train)]
dim(xdata.test)
## [1] 108 19560
dim(xdata.train)
## [1] 249 19560
xdata.train[1:3,1:3]
## A1BG A1CF A2BP1
## TCGA-3L-AA1B -0.09037123 0.50178233 -0.29147700
## TCGA-4N-A93T 6.56494274 -0.65736676 0.03499383
## TCGA-4T-AA8H -0.14165161 0.04934209 -0.36260403
#glmnet
cv.fit_twii3 <- twiner_glmnet(xdata.train,ydata.train,0.3, tumornormal_weights6)
#genes
coefs.v_twii3 <- twiner_coefs(cv.fit_twii3)
fit.selected_twii3 <- twiner_genes(cv.fit_twii3)
length(fit.selected_twii3)
## [1] 10
fit.selected_twii3
## [1] "C10orf114" "CARKD" "FAM159A" "IFT74" "LOC285780" "LOC646498"
## [7] "OSBPL3" "PCDHB12" "PRCD" "TRIM67"
#train
train_twii03 <- train (coefs.v_twii3)
train_twii03$pvalue ## these values are the ones in Table 1
## [1] 0.002401583
#test
test_twii03 <- test (coefs.v_twii3)
test_twii03$pvalue
## [1] 0.07566096
#all
all_twii03 <- all (coefs.v_twii3)
#glmnet
cv.fit_twii2 <- twiner_glmnet(xdata.train,ydata.train,0.2, tumornormal_weights6)
#genes
coefs.v_twii2 <- twiner_coefs(cv.fit_twii2)
fit.selected_twii2 <- twiner_genes(cv.fit_twii2)
length(fit.selected_twii2)
## [1] 11
fit.selected_twii2
## [1] "C10orf114" "CARKD" "FAM159A" "IFT74" "LOC285780" "LOC646498"
## [7] "OSBPL3" "PAX5" "PCDHB12" "PRCD" "TRIM67"
#train
train_twii02 <- train (coefs.v_twii2)
train_twii02$pvalue
## [1] 0.0005882518
#test
test_twii02 <- test (coefs.v_twii2)
test_twii02$pvalue
## [1] 0.06653314
#all
all_twii02 <- all (coefs.v_twii2)
#glmnet
cv.fit_twii1 <- twiner_glmnet(xdata.train,ydata.train,0.1, tumornormal_weights6)
#genes
coefs.v_twii1 <- twiner_coefs(cv.fit_twii1)
fit.selected_twii1 <- twiner_genes(cv.fit_twii1)
length(fit.selected_twii1)
## [1] 53
fit.selected_twii1
## [1] "ANKRD26P1" "AOX2P" "ASPHD1" "C10orf114" "C20orf106"
## [6] "CAPN7" "CARKD" "CLDN9" "COL19A1" "COX4I2"
## [11] "CUZD1" "CYP27C1" "CYP7A1" "DCLK3" "DCP1A"
## [16] "DPPA2" "EEPD1" "FAM138B" "FAM159A" "FCRL2"
## [21] "FLJ16779" "GRAPL" "HPCAL1" "IFT74" "IGLON5"
## [26] "LBX2" "LOC100303728" "LOC285074" "LOC285780" "LOC646498"
## [31] "LOC732275" "MEIG1" "NKAIN4" "NXF2B" "OR2T5"
## [36] "OSBPL3" "OSTN" "PAX5" "PCDHA7" "PCDHB12"
## [41] "PGAM2" "PRCD" "RAB20" "SEPT7P2" "SNTG1"
## [46] "TAC3" "TERF2IP" "TRIM67" "TXNL4B" "WBSCR27"
## [51] "ZDHHC19" "ZNF607" "ZNF883"
#train
train_twii01 <- train (coefs.v_twii1)
train_twii01$pvalue
## [1] 2.664447e-09
#test
test_twii01 <- test (coefs.v_twii1)
test_twii01$pvalue
## [1] 0.01944886
#all
all_twii01 <- all (coefs.v_twii1)
#glmnet
cv.fit_twii05 <- twiner_glmnet(xdata.train,ydata.train,0.05, tumornormal_weights6)
#genes
coefs.v_twii05 <- twiner_coefs(cv.fit_twii05)
fit.selected_twii05 <- twiner_genes(cv.fit_twii05)
length(fit.selected_twii05)
## [1] 96
#train
train_twii05 <- train (coefs.v_twii05)
#test
test_twii05 <- test (coefs.v_twii05)
#all
all_twii05 <- all (coefs.v_twii05)
alpha_points<-c(0.3, 0.2,0.1,0.05)
pvalues_twii_values_train<-c(train_twii03$pvalue,train_twii02$pvalue,train_twii01$pvalue,train_twii05$pvalue)
pvalues_twii_values_test<-c(test_twii03$pvalue,test_twii02$pvalue,test_twii01$pvalue,test_twii05$pvalue)
pvalues_twii_values_all<-c(all_twii03$pvalue,all_twii02$pvalue,all_twii01$pvalue,all_twii05$pvalue)
pvalues_twii_values<-c(train_twii03$pvalue,train_twii02$pvalue,train_twii01$pvalue,train_twii05$pvalue,test_twii03$pvalue,test_twii02$pvalue,test_twii01$pvalue,test_twii05$pvalue,all_twii03$pvalue,all_twii02$pvalue,all_twii01$pvalue,all_twii05$pvalue)
pvalues_twii_method<-c(rep("pvalues TCox (1/w) train set",4),rep("pvalues TCox (1/w) test set",4),rep("pvalues TCox (1/w) all set",4))
pvalues_twii<-data.frame(alpha_points,pvalues_twii_values,pvalues_twii_method)
p_pvalues_twii<-ggplot(data = pvalues_twii, aes(x = alpha_points, y = pvalues_twii_values,color= pvalues_twii_method)) + geom_line()+geom_point()+theme_minimal()
p_pvalues_twii+ ylim(0, max(pvalues_twii_values))
#glmnet
cv.fit_twexp23 <- twiner_glmnet(xdata.train,ydata.train,0.3, tumornormal_weights5)
#genes
coefs.v_twexp23 <- twiner_coefs(cv.fit_twexp23)
fit.selected_twexp23 <- twiner_genes(cv.fit_twexp23)
length(fit.selected_twexp23)
## [1] 12
#train
train_twexp203 <- train (coefs.v_twexp23)
train_twexp203$pvalue ## these values are the ones in Table 1
## [1] 0.0006276918
#test
test_twexp203 <- test (coefs.v_twexp23)
test_twexp203$pvalue
## [1] 0.4244
#all
all_twexp203 <- all (coefs.v_twexp23)
#glmnet
cv.fit_twexp22 <- twiner_glmnet(xdata.train,ydata.train,0.2, tumornormal_weights5)
#genes
coefs.v_twexp22 <- twiner_coefs(cv.fit_twexp22)
fit.selected_twexp22 <- twiner_genes(cv.fit_twexp22)
length(fit.selected_twexp22)
## [1] 18
#train
train_twexp202 <- train (coefs.v_twexp22)
#test
test_twexp202 <- test (coefs.v_twexp22)
#all
all_twexp202 <- all (coefs.v_twexp22)
#glmnet
cv.fit_twexp21 <- twiner_glmnet(xdata.train,ydata.train,0.1, tumornormal_weights5)
#genes
coefs.v_twexp21 <- twiner_coefs(cv.fit_twexp21)
fit.selected_twexp21 <- twiner_genes(cv.fit_twexp21)
length(fit.selected_twexp21)
## [1] 44
#train
train_twexp201 <- train (coefs.v_twexp21)
#test
test_twexp201 <- test (coefs.v_twexp21)
#all
all_twexp201 <- all (coefs.v_twexp21)
#glmnet
cv.fit_twexp205 <- twiner_glmnet(xdata.train,ydata.train,0.05, tumornormal_weights5)
#genes
coefs.v_twexp205 <- twiner_coefs(cv.fit_twexp205)
fit.selected_twexp205 <- twiner_genes(cv.fit_twexp205)
length(fit.selected_twexp205)
## [1] 72
#train
train_twexp205 <- train (coefs.v_twexp205)
#test
test_twexp205 <- test (coefs.v_twexp205)
#all
all_twexp205 <- all (coefs.v_twexp205)
alpha_points<-c(0.3, 0.2,0.1,0.05)
pvalues_twexp2_values_train<-c(train_twexp203$pvalue,train_twexp202$pvalue,train_twexp201$pvalue,train_twexp205$pvalue)
pvalues_twexp2_values_test<-c(test_twexp203$pvalue,test_twexp202$pvalue,test_twexp201$pvalue,test_twexp205$pvalue)
pvalues_twexp2_values_all<-c(all_twexp203$pvalue,all_twexp202$pvalue,all_twexp201$pvalue,all_twexp205$pvalue)
pvalues_twexp2_values<-c(train_twexp203$pvalue,train_twexp202$pvalue,train_twexp201$pvalue,train_twexp205$pvalue,test_twexp203$pvalue,test_twexp202$pvalue,test_twexp201$pvalue,test_twexp205$pvalue,all_twexp203$pvalue,all_twexp202$pvalue,all_twexp201$pvalue,all_twexp205$pvalue)
pvalues_twexp2_method<-c(rep("pvalues TCox exp(-w^3) train set",4),rep("pvalues TCox exp(-w^3) test set",4),rep("pvalues TCox exp(-w^3) all set",4))
pvalues_twexp2<-data.frame(alpha_points,pvalues_twexp2_values,pvalues_twexp2_method)
p_pvalues_twexp2<-ggplot(data = pvalues_twexp2, aes(x = alpha_points, y = pvalues_twexp2_values,color= pvalues_twexp2_method)) + geom_line()+geom_point()+theme_minimal()
p_pvalues_twexp2+ ylim(0, max(pvalues_twexp2_values))
#glmnet
cv.fit_tw1i3 <- twiner_glmnet(xdata.train,ydata.train,0.3, tumornormal_weights1)
#genes
coefs.v_tw1i3 <- twiner_coefs(cv.fit_tw1i3)
fit.selected_tw1i3 <- twiner_genes(cv.fit_tw1i3)
length(fit.selected_tw1i3)
## [1] 23
#train
train_tw1i03 <- train (coefs.v_tw1i3)
#test
test_tw1i03 <- test (coefs.v_tw1i3)
#all
all_tw1i03 <- all (coefs.v_tw1i3)
#glmnet
cv.fit_tw1i2 <- twiner_glmnet(xdata.train,ydata.train,0.2, tumornormal_weights1)
#genes
coefs.v_tw1i2 <- twiner_coefs(cv.fit_tw1i2)
fit.selected_tw1i2 <- twiner_genes(cv.fit_tw1i2)
length(fit.selected_tw1i2)
## [1] 28
#train
train_tw1i02 <- train (coefs.v_tw1i2)
#test
test_tw1i02 <- test (coefs.v_tw1i2)
#all
all_tw1i02 <- all (coefs.v_tw1i2)
#glmnet
cv.fit_tw1i1 <- twiner_glmnet(xdata.train,ydata.train,0.1, tumornormal_weights1)
#genes
coefs.v_tw1i1 <- twiner_coefs(cv.fit_tw1i1)
fit.selected_tw1i1 <- twiner_genes(cv.fit_tw1i1)
length(fit.selected_tw1i1)
## [1] 36
#train
train_tw1i01 <- train (coefs.v_tw1i1)
#test
test_tw1i01 <- test (coefs.v_tw1i1)
#all
all_tw1i01 <- all (coefs.v_tw1i1)
#glmnet
cv.fit_tw1i05 <- twiner_glmnet(xdata.train,ydata.train,0.05, tumornormal_weights1)
#genes
coefs.v_tw1i05 <- twiner_coefs(cv.fit_tw1i05)
fit.selected_tw1i05 <- twiner_genes(cv.fit_tw1i05)
length(fit.selected_tw1i05)
## [1] 47
#train
train_tw1i05 <- train (coefs.v_tw1i05)
#test
test_tw1i05 <- test (coefs.v_tw1i05)
#all
all_tw1i05 <- all (coefs.v_tw1i05)
alpha_points<-c(0.3,0.2,0.1,0.05)
pvalues_tw1i_values_train<-c(train_tw1i03$pvalue,train_tw1i02$pvalue,train_tw1i01$pvalue,train_tw1i05$pvalue)
pvalues_tw1i_values_test<-c(test_tw1i03$pvalue,test_tw1i02$pvalue,test_tw1i01$pvalue,test_tw1i05$pvalue)
pvalues_tw1i_values_all<-c(all_tw1i03$pvalue,all_tw1i02$pvalue,all_tw1i01$pvalue,all_tw1i05$pvalue)
pvalues_tw1i_values<-c(train_tw1i03$pvalue,train_tw1i02$pvalue,train_tw1i01$pvalue,train_tw1i05$pvalue,test_tw1i03$pvalue,test_tw1i02$pvalue,test_tw1i01$pvalue,test_tw1i05$pvalue,all_tw1i03$pvalue,all_tw1i02$pvalue,all_tw1i01$pvalue,all_tw1i05$pvalue)
pvalues_tw1i_method<-c(rep("pvalues TCox (1-w) train set",4),rep("pvalues TCox (1-w) test set",4),rep("pvalues TCox (1-w) all set",4))
pvalues_tw1i<-data.frame(alpha_points,pvalues_tw1i_values,pvalues_tw1i_method)
p_pvalues_tw1i<-ggplot(data = pvalues_tw1i, aes(x = alpha_points, y = pvalues_tw1i_values,color= pvalues_tw1i_method)) + geom_line()+geom_point()+theme_minimal()
p_pvalues_tw1i+ ylim(0, max(pvalues_tw1i_values))
#glmnet
cv.fit_twexp3 <- twiner_glmnet(xdata.train,ydata.train,0.3, tumornormal_weights4)
#genes
coefs.v_twexp3 <- twiner_coefs(cv.fit_twexp3)
fit.selected_twexp3 <- twiner_genes(cv.fit_twexp3)
length(fit.selected_twexp3)
## [1] 21
#train
train_twexp03 <- train (coefs.v_twexp3)
train_twexp03$pvalue ## these values are the ones in Table 1
## [1] 7.367639e-07
#test
test_twexp03 <- test (coefs.v_twexp3)
test_twexp03$pvalue
## [1] 0.1141262
#all
all_twexp03 <- all (coefs.v_twexp3)
#glmnet
cv.fit_twexp2 <- twiner_glmnet(xdata.train,ydata.train,0.2, tumornormal_weights4)
#genes
coefs.v_twexp2 <- twiner_coefs(cv.fit_twexp2)
fit.selected_twexp2 <- twiner_genes(cv.fit_twexp2)
length(fit.selected_twexp2)
## [1] 24
#train
train_twexp02 <- train (coefs.v_twexp2)
#test
test_twexp02 <- test (coefs.v_twexp2)
#all
all_twexp02 <- all (coefs.v_twexp2)
#glmnet
cv.fit_twexp1 <- twiner_glmnet(xdata.train,ydata.train,0.1, tumornormal_weights4)
#genes
coefs.v_twexp1 <- twiner_coefs(cv.fit_twexp1)
fit.selected_twexp1 <- twiner_genes(cv.fit_twexp1)
length(fit.selected_twexp1)
## [1] 98
#train
train_twexp01 <- train (coefs.v_twexp1)
#test
test_twexp01 <- test (coefs.v_twexp1)
#all
all_twexp01 <- all (coefs.v_twexp1)
#glmnet
cv.fit_twexp05 <- twiner_glmnet(xdata.train,ydata.train,0.05, tumornormal_weights4)
#genes
coefs.v_twexp05 <- twiner_coefs(cv.fit_twexp05)
fit.selected_twexp05 <- twiner_genes(cv.fit_twexp05)
length(fit.selected_twexp05)
## [1] 159
#train
train_twexp05 <- train (coefs.v_twexp05)
#test
test_twexp05 <- test (coefs.v_twexp05)
#all
all_twexp05 <- all (coefs.v_twexp05)
alpha_points<-c(0.3, 0.2,0.1,0.05)
pvalues_twexp_values_train<-c(train_twexp03$pvalue,train_twexp02$pvalue,train_twexp01$pvalue,train_twexp05$pvalue)
pvalues_twexp_values_test<-c(test_twexp03$pvalue,test_twexp02$pvalue,test_twexp01$pvalue,test_twexp05$pvalue)
pvalues_twexp_values_all<-c(all_twexp03$pvalue,all_twexp02$pvalue,all_twexp01$pvalue,all_twexp05$pvalue)
pvalues_twexp_values<-c(train_twexp03$pvalue,train_twexp02$pvalue,train_twexp01$pvalue,train_twexp05$pvalue,test_twexp03$pvalue,test_twexp02$pvalue,test_twexp01$pvalue,test_twexp05$pvalue,all_twexp03$pvalue,all_twexp02$pvalue,all_twexp01$pvalue,all_twexp05$pvalue)
pvalues_twexp_method<-c(rep("pvalues TCox exp((1-w)^3) train set",4),rep("pvalues TCox exp((1-w)^3) test set",4),rep("pvalues TCox exp((1-w)^3) all set",4))
pvalues_twexp<-data.frame(alpha_points,pvalues_twexp_values,pvalues_twexp_method)
p_pvalues_twexp<-ggplot(data = pvalues_twexp, aes(x = alpha_points, y = pvalues_twexp_values,color= pvalues_twexp_method)) + geom_line()+geom_point()+theme_minimal()
p_pvalues_twexp+ ylim(0, max(pvalues_twexp_values))
#glmnet
cv.fit_twele3 <- twiner_glmnet(xdata.train,ydata.train,0.3, tumornormal_weights2)
#genes
coefs.v_twele3 <- twiner_coefs(cv.fit_twele3)
fit.selected_twele3 <- twiner_genes(cv.fit_twele3)
length(fit.selected_twele3)
## [1] 21
#train
train_twele03 <- train (coefs.v_twele3)
train_twele03$pvalue ## these values are the ones in Table 1
## [1] 3.783471e-07
#test
test_twele03 <- test (coefs.v_twele3)
test_twele03$pvalue
## [1] 0.06125732
#all
all_twele03 <- all (coefs.v_twele3)
#glmnet
cv.fit_twele2 <- twiner_glmnet(xdata.train,ydata.train,0.2, tumornormal_weights2)
#genes
coefs.v_twele2 <- twiner_coefs(cv.fit_twele2)
fit.selected_twele2 <- twiner_genes(cv.fit_twele2)
length(fit.selected_twele2)
## [1] 21
#train
train_twele02 <- train (coefs.v_twele2)
#test
test_twele02 <- test (coefs.v_twele2)
#all
all_twele02 <- all (coefs.v_twele2)
#glmnet
cv.fit_twele1 <- twiner_glmnet(xdata.train,ydata.train,0.1, tumornormal_weights2)
#genes
coefs.v_twele1 <- twiner_coefs(cv.fit_twele1)
fit.selected_twele1 <- twiner_genes(cv.fit_twele1)
length(fit.selected_twele1)
## [1] 33
#train
train_twele01 <- train (coefs.v_twele1)
#test
test_twele01 <- test (coefs.v_twele1)
#all
all_twele01 <- all (coefs.v_twele1)
#glmnet
cv.fit_twele05 <- twiner_glmnet(xdata.train,ydata.train,0.05, tumornormal_weights2)
#genes
coefs.v_twele05 <- twiner_coefs(cv.fit_twele05)
fit.selected_twele05 <- twiner_genes(cv.fit_twele05)
length(fit.selected_twele05)
## [1] 46
#train
train_twele05 <- train (coefs.v_twele05)
#test
test_twele05 <- test (coefs.v_twele05)
#all
all_twele05 <- all (coefs.v_twele05)
alpha_points<-c(0.3, 0.2,0.1,0.05)
pvalues_twele_values_train<-c(train_twele03$pvalue,train_twele02$pvalue,train_twele01$pvalue,train_twele05$pvalue)
pvalues_twele_values_test<-c(test_twele03$pvalue,test_twele02$pvalue,test_twele01$pvalue,test_twele05$pvalue)
pvalues_twele_values_all<-c(all_twele03$pvalue,all_twele02$pvalue,all_twele01$pvalue,all_twele05$pvalue)
pvalues_twele_values<-c(train_twele03$pvalue,train_twele02$pvalue,train_twele01$pvalue,train_twele05$pvalue,test_twele03$pvalue,test_twele02$pvalue,test_twele01$pvalue,test_twele05$pvalue,all_twele03$pvalue,all_twele02$pvalue,all_twele01$pvalue,all_twele05$pvalue)
pvalues_twele_method<-c(rep("pvalues TCox (1-w^3) train set",4),rep("pvalues TCox (1-w^3) test set",4),rep("pvalues TCox (1-w^3) all set",4))
pvalues_twele<-data.frame(alpha_points,pvalues_twele_values,pvalues_twele_method)
p_pvalues_twele<-ggplot(data = pvalues_twele, aes(x = alpha_points, y = pvalues_twele_values,color= pvalues_twele_method)) + geom_line()+geom_point()+theme_minimal()
p_pvalues_twele+ ylim(0, max(pvalues_twele_values))
#glmnet
cv.fit_tweleele3 <- twiner_glmnet(xdata.train,ydata.train,0.3, tumornormal_weights3)
#genes
coefs.v_tweleele3 <- twiner_coefs(cv.fit_tweleele3)
fit.selected_tweleele3 <- twiner_genes(cv.fit_tweleele3)
length(fit.selected_tweleele3)
## [1] 21
#train
train_tweleele03 <- train (coefs.v_tweleele3)
train_tweleele03$pvalue ## these values are the ones in Table 1
## [1] 2.406974e-06
#test
test_tweleele03 <- test (coefs.v_tweleele3)
test_tweleele03$pvalue
## [1] 0.534717
#all
all_tweleele03 <- all (coefs.v_tweleele3)
#glmnet
cv.fit_tweleele2 <- twiner_glmnet(xdata.train,ydata.train,0.2, tumornormal_weights3)
#genes
coefs.v_tweleele2 <- twiner_coefs(cv.fit_tweleele2)
fit.selected_tweleele2 <- twiner_genes(cv.fit_tweleele2)
length(fit.selected_tweleele2)
## [1] 21
#train
train_tweleele02 <- train (coefs.v_tweleele2)
#test
test_tweleele02 <- test (coefs.v_tweleele2)
#all
all_tweleele02 <- all (coefs.v_tweleele2)
#glmnet
cv.fit_tweleele1 <- twiner_glmnet(xdata.train,ydata.train,0.1, tumornormal_weights3)
#genes
coefs.v_tweleele1 <- twiner_coefs(cv.fit_tweleele1)
fit.selected_tweleele1 <- twiner_genes(cv.fit_tweleele1)
length(fit.selected_tweleele1)
## [1] 25
#train
train_tweleele01 <- train (coefs.v_tweleele1)
#test
test_tweleele01 <- test (coefs.v_tweleele1)
#all
all_tweleele01 <- all (coefs.v_tweleele1)
#glmnet
cv.fit_tweleele05 <- twiner_glmnet(xdata.train,ydata.train,0.05, tumornormal_weights3)
#genes
coefs.v_tweleele05 <- twiner_coefs(cv.fit_tweleele05)
fit.selected_tweleele05 <- twiner_genes(cv.fit_tweleele05)
length(fit.selected_tweleele05)
## [1] 30
#train
train_tweleele05 <- train (coefs.v_tweleele05)
#test
test_tweleele05 <- test (coefs.v_tweleele05)
#all
all_tweleele05 <- all (coefs.v_tweleele05)
alpha_points<-c(0.3, 0.2,0.1,0.05)
pvalues_tweleele_values_train<-c(train_tweleele03$pvalue,train_tweleele02$pvalue,train_tweleele01$pvalue,train_tweleele05$pvalue)
pvalues_tweleele_values_test<-c(test_tweleele03$pvalue,test_tweleele02$pvalue,test_tweleele01$pvalue,test_tweleele05$pvalue)
pvalues_tweleele_values_all<-c(all_tweleele03$pvalue,all_tweleele02$pvalue,all_tweleele01$pvalue,all_tweleele05$pvalue)
pvalues_tweleele_values<-c(train_tweleele03$pvalue,train_tweleele02$pvalue,train_tweleele01$pvalue,train_tweleele05$pvalue,test_tweleele03$pvalue,test_tweleele02$pvalue,test_tweleele01$pvalue,test_tweleele05$pvalue,all_tweleele03$pvalue,all_tweleele02$pvalue,all_tweleele01$pvalue,all_tweleele05$pvalue)
pvalues_tweleele_method<-c(rep("pvalues TCox (1-w)^3 train set",4),rep("pvalues TCox (1-w)^3 test set",4),rep("pvalues TCox (1-w)^3 all set",4))
pvalues_tweleele<-data.frame(alpha_points,pvalues_tweleele_values,pvalues_tweleele_method)
p_pvalues_tweleele<-ggplot(data = pvalues_tweleele, aes(x = alpha_points, y = pvalues_tweleele_values,color= pvalues_tweleele_method)) + geom_line()+geom_point()+theme_minimal()
p_pvalues_tweleele+ ylim(0, max(pvalues_tweleele_values))
alpha_points<-c(0.3,0.2,0.1,0.05)
#1-w
pvalues_tw1i_values_test<-c(test_tw1i03$pvalue,test_tw1i02$pvalue,test_tw1i01$pvalue,test_tw1i05$pvalue)
#1/w
pvalues_twii_values_test<-c(test_twii03$pvalue,test_twii02$pvalue,test_twii01$pvalue,test_twii05$pvalue)
#exp((1-w)^3)
pvalues_twexp_values_test<-c(test_twexp03$pvalue,test_twexp02$pvalue,test_twexp01$pvalue,test_twexp05$pvalue)
#exp(-w^3)
pvalues_twexp2_values_test<-c(test_twexp203$pvalue,test_twexp202$pvalue,test_twexp201$pvalue,test_twexp205$pvalue)
#1-w^3
pvalues_twele_values_test<-c(test_twele03$pvalue,test_twele02$pvalue,test_twele01$pvalue,test_twele05$pvalue)
#(1-w)^3
pvalues_tweleele_values_test<-c(test_tweleele03$pvalue,test_tweleele02$pvalue,test_tweleele01$pvalue,test_tweleele05$pvalue)
pvalues_test<-c(pvalues_tw1i_values_test,pvalues_twii_values_test,pvalues_twexp_values_test,pvalues_twexp2_values_test,pvalues_twele_values_test,pvalues_tweleele_values_test)
pvalues_method_test<-c( rep("1-w",4), rep("1/w",4), rep("exp((1-w)^3)",4), rep("exp(-w^3)",4),rep("1-w^3 ",4),rep("(1-w)^3",4))
pvalues_test_data<-data.frame(alpha_points,pvalues_test,pvalues_method_test)
p_pvalues_test<-ggplot(data = pvalues_test_data, aes(x = alpha_points, y = pvalues_test,color= pvalues_method_test)) + geom_hline(yintercept=0.05) + geom_line()+geom_point()+theme_minimal()
p_pvalues_test+ ylim(0, max(pvalues_test))
Train and Test curves obtained using 1/w weight vector and alpha = 0.1
#train
train_twii01
## $pvalue
## [1] 2.664447e-09
##
## $plot
##
## $km
## Call: survfit(formula = survival::Surv(time, status) ~ group, data = prognostic.index.df)
##
## n events median 0.95LCL 0.95UCL
## Low risk 125 8 NA NA NA
## High risk 124 42 1422 1126 1741
#test
test_twii01
## $pvalue
## [1] 0.01944886
##
## $plot
##
## $km
## Call: survfit(formula = survival::Surv(time, status) ~ group, data = prognostic.index.df)
##
## n events median 0.95LCL 0.95UCL
## Low risk 54 7 NA 2003 NA
## High risk 54 15 1910 992 NA
cv.fit_elastic_net3<-cv.glmnet(as.matrix(xdata.train),as.matrix(ydata.train),
family = 'cox',foldid = my_foldid,
alpha = 0.3,
nlambda = 100,
#lambda.min.ratio = 10^(-3),
nfolds= params$nfolds)
coefs.v_elastic3 <- coef(cv.fit_elastic_net3, s = 'lambda.min')[,1] %>% { .[. != 0]}
fit.selected_elastic3 <- which(cv.fit_elastic_net3$glmnet.fit$beta[,which(cv.fit_elastic_net3$cvm == min(cv.fit_elastic_net3$cvm))] != 0)
length(fit.selected_elastic3)
## [1] 18
names(fit.selected_elastic3)
## [1] "AFF3" "CLDN9" "ELFN1" "FAM129C" "FAM138B" "FAM159A"
## [7] "GRAPL" "KCNMB3" "LBX2" "LOC220930" "LOC646498" "LOC732275"
## [13] "MBL1P" "NKX1-2" "PBX4" "PGAM2" "PI4K2B" "PRCD"
train_en03<-separate2GroupsCox(as.vector(coefs.v_elastic3),
xdata.train[, names(coefs.v_elastic3)],
ydata.train,
plot.title = 'Train Set Elastic Net, alpha=0.3', legend.outside = FALSE)
train_en03$pvalue
## [1] 8.387034e-07
test_en03<-separate2GroupsCox(as.vector(coefs.v_elastic3),
xdata.test[, names(coefs.v_elastic3)],
ydata.test,
plot.title = 'Test Set Elastic Net, alpha=0.3', legend.outside = FALSE)
test_en03$pvalue
## [1] 0.00882275
all_en03<-separate2GroupsCox(as.vector(coefs.v_elastic3),
xdata[, names(coefs.v_elastic3)],
ydata %>% mutate(time = time / 365 * 12),
plot.title = 'All data Elastic Net, alpha=0.3', legend.outside = FALSE,
break.x.by = 12)
cv.fit_elastic_net2<-cv.glmnet(as.matrix(xdata.train),as.matrix(ydata.train),
family = 'cox',foldid = my_foldid,
alpha = 0.2,
nlambda = 100,
#lambda.min.ratio = 10^(-2),
nfolds= params$nfolds)
coefs.v_elastic2 <- coef(cv.fit_elastic_net2, s = 'lambda.min')[,1] %>% { .[. != 0]}
fit.selected_elastic2 <- which(cv.fit_elastic_net2$glmnet.fit$beta[,which(cv.fit_elastic_net2$cvm == min(cv.fit_elastic_net2$cvm))] != 0)
length(fit.selected_elastic2)
## [1] 47
names(fit.selected_elastic2)
## [1] "AASS" "AFF3" "BACH2" "C1orf185" "C7orf13"
## [6] "CLDN9" "CLEC17A" "CLEC18C" "CYP7A1" "DCAF10"
## [11] "EEPD1" "ELFN1" "ENO3" "FAM129C" "FAM138B"
## [16] "FAM159A" "FATE1" "FCRL1" "GRAPL" "ISL2"
## [21] "KCNMB3" "LBX2" "LOC100130238" "LOC220930" "LOC646498"
## [26] "LOC732275" "MBL1P" "MEIG1" "MYF6" "MYH8"
## [31] "NGLY1" "NKAIN4" "NKX1-2" "PBX4" "PCDHB12"
## [36] "PGAM2" "PI4K2B" "PRCD" "PTPN6" "RFPL4B"
## [41] "RPP14" "SIX2" "TCL1A" "TMEM86B" "UNC45B"
## [46] "ZDHHC19" "ZNF883"
train_en02<-separate2GroupsCox(as.vector(coefs.v_elastic2),
xdata.train[, names(coefs.v_elastic2)],
ydata.train,
plot.title = 'Train Set Elastic Net, alpha=0.2', legend.outside = FALSE)
train_en02$pvalue
## [1] 2.474278e-08
test_en02<-separate2GroupsCox(as.vector(coefs.v_elastic2),
xdata.test[, names(coefs.v_elastic2)],
ydata.test,
plot.title = 'Test Set Elastic Net, alpha=0.2', legend.outside = FALSE)
test_en02$pvalue
## [1] 0.07168201
all_en02<-separate2GroupsCox(as.vector(coefs.v_elastic2),
xdata[, names(coefs.v_elastic2)],
ydata %>% mutate(time = time / 365 * 12),
plot.title = 'All data Elastic Net, alpha=0.2', legend.outside = FALSE,
break.x.by = 12)
cv.fit_elastic_net1<-cv.glmnet(as.matrix(xdata.train),as.matrix(ydata.train),
family = 'cox',foldid = my_foldid,
alpha = 0.1,
nlambda = 100,
#lambda.min.ratio = 10^(-2),
nfolds= params$nfolds)
coefs.v_elastic1 <- coef(cv.fit_elastic_net1, s = 'lambda.min')[,1] %>% { .[. != 0]}
fit.selected_elastic1 <- which(cv.fit_elastic_net1$glmnet.fit$beta[,which(cv.fit_elastic_net1$cvm == min(cv.fit_elastic_net1$cvm))] != 0)
length(fit.selected_elastic1)
## [1] 88
names(fit.selected_elastic1)
## [1] "AASS" "AFF3" "BACH2" "BLK" "C10orf114"
## [6] "C14orf72" "C1orf185" "C2CD4C" "C7orf13" "C9orf82"
## [11] "CABP5" "CAPRIN2" "CCDC126" "CD19" "CDH18"
## [16] "CLDN9" "CLEC17A" "CLEC18C" "CLVS2" "CST2"
## [21] "CUZD1" "CYP7A1" "DCAF10" "DNAI2" "EEPD1"
## [26] "EGFL7" "ELFN1" "EML6" "ENO3" "FAM129C"
## [31] "FAM138B" "FAM159A" "FAM81B" "FATE1" "FCER2"
## [36] "FCRL1" "GABRD" "GJA3" "GRAPL" "GUCA1B"
## [41] "HOTAIR" "HPCAL1" "ISL2" "KCNMB3" "LBX2"
## [46] "LOC100130238" "LOC220930" "LOC283663" "LOC646498" "LOC732275"
## [51] "LRP2BP" "MBL1P" "MEF2B" "MEIG1" "MYF6"
## [56] "MYH8" "NCF1C" "NEK4" "NELF" "NGLY1"
## [61] "NKAIN4" "NKX1-2" "PAX5" "PBX4" "PCDHB12"
## [66] "PGAM2" "PI4K2B" "PRCD" "PRR4" "PTPN6"
## [71] "PYROXD2" "RASA4P" "RFPL4B" "RPP14" "SHOX2"
## [76] "SIX2" "SLC25A18" "TAS2R20" "TCF15" "TCL1A"
## [81] "TERT" "TMEM86B" "TMEM90B" "TUBA8" "UNC45B"
## [86] "ZDHHC19" "ZNF607" "ZNF883"
train_en01<-separate2GroupsCox(as.vector(coefs.v_elastic1),
xdata.train[, names(coefs.v_elastic1)],
ydata.train,
plot.title = 'Train Set Elastic Net, alpha=0.1', legend.outside = FALSE)
train_en01$pvalue
## [1] 5.28787e-09
test_en01<-separate2GroupsCox(as.vector(coefs.v_elastic1),
xdata.test[, names(coefs.v_elastic1)],
ydata.test,
plot.title = 'Test Set Elastic Net, alpha=0.1', legend.outside = FALSE)
test_en01$pvalue
## [1] 0.04921873
all_en01<-separate2GroupsCox(as.vector(coefs.v_elastic1),
xdata[, names(coefs.v_elastic1)],
ydata %>% mutate(time = time / 365 * 12),
plot.title = 'All data Elastic Net, alpha=0.1', legend.outside = FALSE,
break.x.by = 12)
cv.fit_elastic_net05<-cv.glmnet(as.matrix(xdata.train),as.matrix(ydata.train),
family = 'cox',foldid = my_foldid,
alpha = 0.05,
nlambda = 100,
#lambda.min.ratio = 10^(-2),
nfolds= params$nfolds)
coefs.v_elastic05 <- coef(cv.fit_elastic_net05, s = 'lambda.min')[,1] %>% { .[. != 0]}
fit.selected_elastic05 <- which(cv.fit_elastic_net05$glmnet.fit$beta[,which(cv.fit_elastic_net05$cvm == min(cv.fit_elastic_net05$cvm))] != 0)
length(fit.selected_elastic05)
## [1] 133
train_en05<-separate2GroupsCox(as.vector(coefs.v_elastic05),
xdata.train[, names(coefs.v_elastic05)],
ydata.train,
plot.title = 'Train Set Elastic Net, alpha=0.05', legend.outside = FALSE)
test_en05<-separate2GroupsCox(as.vector(coefs.v_elastic05),
xdata.test[, names(coefs.v_elastic05)],
ydata.test,
plot.title = 'Test Set Elastic Net, alpha=0.05', legend.outside = FALSE)
all_en05<-separate2GroupsCox(as.vector(coefs.v_elastic05),
xdata[, names(coefs.v_elastic05)],
ydata %>% mutate(time = time / 365 * 12),
plot.title = 'All data Elastic Net, alpha=0.05', legend.outside = FALSE,
break.x.by = 12)
#####1 alpha=0.04
cv.fit_elastic_net04<-cv.glmnet(as.matrix(xdata.train),as.matrix(ydata.train),
family = 'cox',foldid = my_foldid,
alpha = 0.04,
nlambda = 100,
#lambda.min.ratio = 10^(-2),
nfolds= params$nfolds)
coefs.v_elastic04 <- coef(cv.fit_elastic_net04, s = 'lambda.min')[,1] %>% { .[. != 0]}
fit.selected_elastic04 <- which(cv.fit_elastic_net04$glmnet.fit$beta[,which(cv.fit_elastic_net04$cvm == min(cv.fit_elastic_net04$cvm))] != 0)
length(fit.selected_elastic04)
## [1] 141
train_en04<-separate2GroupsCox(as.vector(coefs.v_elastic04),
xdata.train[, names(coefs.v_elastic04)],
ydata.train,
plot.title = 'Train Set Elastic Net, alpha=0.04', legend.outside = FALSE)
test_en04<-separate2GroupsCox(as.vector(coefs.v_elastic04),
xdata.test[, names(coefs.v_elastic04)],
ydata.test,
plot.title = 'Test Set Elastic Net, alpha=0.04', legend.outside = FALSE)
all_en04<-separate2GroupsCox(as.vector(coefs.v_elastic04),
xdata[, names(coefs.v_elastic04)],
ydata %>% mutate(time = time / 365 * 12),
plot.title = 'All data Elastic Net, alpha=0.04', legend.outside = FALSE,
break.x.by = 12)
alpha_points<-c(0.3,0.2,0.1,0.05)
pvalues_en_train<-c(train_en03$pvalue,train_en02$pvalue,train_en01$pvalue,train_en05$pvalue)
pvalues_en_test<-c(test_en03$pvalue,test_en02$pvalue,test_en01$pvalue,test_en05$pvalue)
pvalues_en_all<-c(all_en03$pvalue,all_en02$pvalue,all_en01$pvalue,all_en05$pvalue)
pvalues_en_values<-c(train_en03$pvalue,train_en02$pvalue,train_en01$pvalue,train_en05$pvalue,
test_en03$pvalue,test_en02$pvalue,test_en01$pvalue,test_en05$pvalue,
all_en03$pvalue,all_en02$pvalue,all_en01$pvalue,all_en05$pvalue)
pvalues_en_method<-c(rep("pvalues en train set",4),rep("pvalues en test set",4),rep("pvalues en all set",4))
cv.fit_hub3<-cv.glmHub(as.matrix(xdata.train),as.matrix(ydata.train),
family = 'cox',foldid = my_foldid, network = "correlation",
alpha = 0.3,
nlambda = 100,
#lambda.min.ratio = 10^(-3),
nfolds= params$nfolds, network.options = networkOptions(cutoff = 0.005, min.degree = 0.6))
coefs.v_hub3 <- coef(cv.fit_hub3, s = 'lambda.min')[,1] %>% { .[. != 0]}
fit.selected_hub3 <- which(cv.fit_hub3$glmnet.fit$beta[,which(cv.fit_hub3$cvm == min(cv.fit_hub3$cvm))] != 0)
length(fit.selected_hub3)
## [1] 26
names(fit.selected_hub3)
## [1] "AASS" "AFF3" "BACH2" "C7orf13" "CLDN9" "DCAF10"
## [7] "EEPD1" "ELFN1" "FAM129C" "FAM138B" "FAM159A" "FCRL1"
## [13] "GRAPL" "KCNMB3" "LBX2" "LOC220930" "LOC646498" "MBL1P"
## [19] "PBX4" "PCDHB12" "PGAM2" "PI4K2B" "PRCD" "TMEM86B"
## [25] "UNC45B" "ZDHHC19"
train_hub03<-separate2GroupsCox(as.vector(coefs.v_hub3),
xdata.train[, names(coefs.v_hub3)],
ydata.train,
plot.title = 'Train Set hub Net, alpha=0.3', legend.outside = FALSE)
train_hub03$pvalue
## [1] 1.788036e-08
test_hub03<-separate2GroupsCox(as.vector(coefs.v_hub3),
xdata.test[, names(coefs.v_hub3)],
ydata.test,
plot.title = 'Test Set hub Net, alpha=0.3', legend.outside = FALSE)
test_hub03$pvalue
## [1] 0.01382812
all_hub03<-separate2GroupsCox(as.vector(coefs.v_hub3),
xdata[, names(coefs.v_hub3)],
ydata %>% mutate(time = time / 365 * 12),
plot.title = 'All data hub Net, alpha=0.3', legend.outside = FALSE,
break.x.by = 12)
cv.fit_hub2<-cv.glmHub(as.matrix(xdata.train),as.matrix(ydata.train),
family = 'cox',foldid = my_foldid, network = "correlation",
alpha = 0.2,
nlambda = 100,
#lambda.min.ratio = 10^(-2),
nfolds= params$nfolds, network.options = networkOptions(cutoff = 0.005, min.degree = 0.6))
coefs.v_hub2 <- coef(cv.fit_hub2, s = 'lambda.min')[,1] %>% { .[. != 0]}
fit.selected_hub2 <- which(cv.fit_hub2$glmnet.fit$beta[,which(cv.fit_hub2$cvm == min(cv.fit_hub2$cvm))] != 0)
length(fit.selected_hub2)
## [1] 47
names(fit.selected_hub2)
## [1] "AASS" "AFF3" "BACH2" "BLK" "C14orf72" "C7orf13"
## [7] "CLDN9" "CLEC17A" "DCAF10" "EEPD1" "EGFL7" "ELFN1"
## [13] "ENO3" "FAM129C" "FAM138B" "FAM159A" "FCRL1" "GRAPL"
## [19] "KCNMB3" "LBX2" "LOC220930" "LOC646498" "LOC732275" "MBL1P"
## [25] "MEIG1" "MYF6" "MYH8" "NCF1C" "NGLY1" "NKAIN4"
## [31] "NKX1-2" "PAX5" "PBX4" "PCDHB12" "PGAM2" "PI4K2B"
## [37] "PRCD" "PTPN6" "RPP14" "SHOX2" "SIX2" "TCL1A"
## [43] "TMEM86B" "TMEM90B" "UNC45B" "ZDHHC19" "ZNF883"
train_hub02<-separate2GroupsCox(as.vector(coefs.v_hub2),
xdata.train[, names(coefs.v_hub2)],
ydata.train,
plot.title = 'Train Set hub Net, alpha=0.2', legend.outside = FALSE)
train_hub02$pvalue
## [1] 1.18224e-08
test_hub02<-separate2GroupsCox(as.vector(coefs.v_hub2),
xdata.test[, names(coefs.v_hub2)],
ydata.test,
plot.title = 'Test Set hub Net, alpha=0.2', legend.outside = FALSE)
test_hub02$pvalue
## [1] 0.01288524
all_hub02<-separate2GroupsCox(as.vector(coefs.v_hub2),
xdata[, names(coefs.v_hub2)],
ydata %>% mutate(time = time / 365 * 12),
plot.title = 'All data hub Net, alpha=0.2', legend.outside = FALSE,
break.x.by = 12)
cv.fit_hub1<-cv.glmHub(as.matrix(xdata.train),as.matrix(ydata.train),
family = 'cox',foldid = my_foldid, network = "correlation",
alpha = 0.1,
nlambda = 100,
#lambda.min.ratio = 10^(-2),
nfolds= params$nfolds, network.options = networkOptions(cutoff = 0.005, min.degree = 0.6))
coefs.v_hub1 <- coef(cv.fit_hub1, s = 'lambda.min')[,1] %>% { .[. != 0]}
fit.selected_hub1 <- which(cv.fit_hub1$glmnet.fit$beta[,which(cv.fit_hub1$cvm == min(cv.fit_hub1$cvm))] != 0)
length(fit.selected_hub1)
## [1] 90
names(fit.selected_hub1)
## [1] "AASS" "AFF3" "BACH2" "BEND4" "BLK"
## [6] "C10orf114" "C14orf72" "C1orf185" "C2CD4C" "C7orf13"
## [11] "C9orf82" "CAPRIN2" "CCDC126" "CD19" "CDH18"
## [16] "CLDN9" "CLEC17A" "CLEC18C" "CLVS2" "CST2"
## [21] "CUZD1" "CYGB" "CYP27C1" "CYP7A1" "DCAF10"
## [26] "EEPD1" "EGFL7" "ELFN1" "EML6" "ENO3"
## [31] "FAM129C" "FAM138B" "FAM159A" "FAM24B" "FATE1"
## [36] "FCRL1" "GABRD" "GRAPL" "HPCAL1" "IFT74"
## [41] "ISL2" "KCNMB3" "LBX2" "LIPT2" "LOC100130238"
## [46] "LOC220930" "LOC285074" "LOC646498" "LOC732275" "LRP2BP"
## [51] "MBL1P" "MEF2B" "MEIG1" "MYF6" "MYH8"
## [56] "NCF1C" "NEK4" "NGLY1" "NKAIN4" "NKX1-2"
## [61] "PAX5" "PBX4" "PCDHB12" "PGAM2" "PI4K2B"
## [66] "PRCD" "PTPN6" "PYROXD2" "RASA4P" "RASGRP2"
## [71] "RFPL4B" "RFT1" "RPP14" "SHOX2" "SIX2"
## [76] "SLC25A18" "SLFN11" "TAS2R20" "TCF15" "TCL1A"
## [81] "TERT" "TMEM86B" "TMEM90B" "TUBA8" "UNC13B"
## [86] "UNC45B" "WBSCR27" "ZDHHC19" "ZNF607" "ZNF883"
train_hub01<-separate2GroupsCox(as.vector(coefs.v_hub1),
xdata.train[, names(coefs.v_hub1)],
ydata.train,
plot.title = 'Train Set hub Net, alpha=0.1', legend.outside = FALSE)
train_hub01$pvalue
## [1] 2.741041e-09
test_hub01<-separate2GroupsCox(as.vector(coefs.v_hub1),
xdata.test[, names(coefs.v_hub1)],
ydata.test,
plot.title = 'Test Set hub Net, alpha=0.1', legend.outside = FALSE)
test_hub01$pvalue
## [1] 0.04180592
all_hub01<-separate2GroupsCox(as.vector(coefs.v_hub1),
xdata[, names(coefs.v_hub1)],
ydata %>% mutate(time = time / 365 * 12),
plot.title = 'All data hub Net, alpha=0.1', legend.outside = FALSE,
break.x.by = 12)
cv.fit_hub05<-cv.glmHub(as.matrix(xdata.train),as.matrix(ydata.train),
family = 'cox',foldid = my_foldid, network = "correlation",
alpha = 0.05,
nlambda = 100,
#lambda.min.ratio = 10^(-2),
nfolds= params$nfolds, network.options = networkOptions(cutoff = 0.005, min.degree = 0.6))
coefs.v_hub05 <- coef(cv.fit_hub05, s = 'lambda.min')[,1] %>% { .[. != 0]}
fit.selected_hub05 <- which(cv.fit_hub05$glmnet.fit$beta[,which(cv.fit_hub05$cvm == min(cv.fit_hub05$cvm))] != 0)
length(fit.selected_hub05)
## [1] 163
train_hub05<-separate2GroupsCox(as.vector(coefs.v_hub05),
xdata.train[, names(coefs.v_hub05)],
ydata.train,
plot.title = 'Train Set hub Net, alpha=0.05', legend.outside = FALSE)
test_hub05<-separate2GroupsCox(as.vector(coefs.v_hub05),
xdata.test[, names(coefs.v_hub05)],
ydata.test,
plot.title = 'Test Set hub Net, alpha=0.05', legend.outside = FALSE)
all_hub05<-separate2GroupsCox(as.vector(coefs.v_hub05),
xdata[, names(coefs.v_hub05)],
ydata %>% mutate(time = time / 365 * 12),
plot.title = 'All data hub Net, alpha=0.05', legend.outside = FALSE,
break.x.by = 12)
alpha_points<-c(0.3, 0.2,0.1,0.05)
pvalues_hub_train<-c(train_hub03$pvalue,train_hub02$pvalue,train_hub01$pvalue,train_hub05$pvalue)
pvalues_hub_test<-c(test_hub03$pvalue,test_hub02$pvalue,test_hub01$pvalue,test_hub05$pvalue)
pvalues_hub_all<-c(all_hub03$pvalue,all_hub02$pvalue,all_hub01$pvalue,all_hub05$pvalue)
pvalues_hub_values<-c(train_hub03$pvalue,train_hub02$pvalue,train_hub01$pvalue,train_hub05$pvalue,
test_hub03$pvalue,test_hub02$pvalue,test_hub01$pvalue,test_hub05$pvalue,
all_hub03$pvalue,all_hub02$pvalue,all_hub01$pvalue,all_hub05$pvalue)
pvalues_hub_method<-c(rep("pvalues hub train set",4),rep("pvalues hub test set",4),rep("pvalues hub all set",4))
pvalues_hub<-data.frame(alpha_points,pvalues_hub_values,pvalues_hub_method)
p_pvalues_hub<-ggplot(data = pvalues_hub, aes(x = alpha_points, y = pvalues_hub_values,color= pvalues_hub_method)) + geom_line()+geom_point()+theme_minimal()
p_pvalues_hub+ ylim(0, max(pvalues_hub_values))
cv.fit_orphan3<-cv.glmOrphan(as.matrix(xdata.train),as.matrix(ydata.train),
family = 'cox',foldid = my_foldid, network = "correlation",
alpha = 0.3,
nlambda = 100,
#lambda.min.ratio = 10^(-3),
nfolds= params$nfolds, network.options = networkOptions(cutoff = 0.001, min.degree = 0.6))
coefs.v_orphan3 <- coef(cv.fit_orphan3, s = 'lambda.min')[,1] %>% { .[. != 0]}
fit.selected_orphan3 <- which(cv.fit_orphan3$glmnet.fit$beta[,which(cv.fit_orphan3$cvm == min(cv.fit_orphan3$cvm))] != 0)
length(fit.selected_orphan3)
## [1] 8
names(fit.selected_orphan3)
## [1] "AFF3" "CLDN9" "ELFN1" "FAM138B" "GRAPL" "KCNMB3"
## [7] "LBX2" "LOC646498"
train_orphan03<-separate2GroupsCox(as.vector(coefs.v_orphan3),
xdata.train[, names(coefs.v_orphan3)],
ydata.train,
plot.title = 'Train Set orphan Net, alpha=0.3', legend.outside = FALSE)
train_orphan03$pvalue
## [1] 2.489645e-05
test_orphan03<-separate2GroupsCox(as.vector(coefs.v_orphan3),
xdata.test[, names(coefs.v_orphan3)],
ydata.test,
plot.title = 'Test Set orphan Net, alpha=0.3', legend.outside = FALSE)
test_orphan03$pvalue
## [1] 0.1518845
all_orphan03<-separate2GroupsCox(as.vector(coefs.v_orphan3),
xdata[, names(coefs.v_orphan3)],
ydata %>% mutate(time = time / 365 * 12),
plot.title = 'All data orphan Net, alpha=0.3', legend.outside = FALSE,
break.x.by = 12)
cv.fit_orphan2<-cv.glmOrphan(as.matrix(xdata.train),as.matrix(ydata.train),
family = 'cox',foldid = my_foldid, network = "correlation",
alpha = 0.2,
nlambda = 100,
#lambda.min.ratio = 10^(-2),
nfolds= params$nfolds, network.options = networkOptions(cutoff = 0.001, min.degree = 0.6))
coefs.v_orphan2 <- coef(cv.fit_orphan2, s = 'lambda.min')[,1] %>% { .[. != 0]}
fit.selected_orphan2 <- which(cv.fit_orphan2$glmnet.fit$beta[,which(cv.fit_orphan2$cvm == min(cv.fit_orphan2$cvm))] != 0)
length(fit.selected_orphan2)
## [1] 44
names(fit.selected_orphan2)
## [1] "AFF3" "BACH2" "C7orf13" "CLDN9" "CLEC17A"
## [6] "CLEC18C" "CYP7A1" "DCAF10" "EEPD1" "ELFN1"
## [11] "ENO3" "FAM129C" "FAM138B" "FAM159A" "FATE1"
## [16] "FCRL1" "GRAPL" "ISL2" "KCNMB3" "LBX2"
## [21] "LOC100130238" "LOC220930" "LOC646498" "LOC732275" "MBL1P"
## [26] "MEIG1" "MYF6" "MYH8" "NKAIN4" "NKX1-2"
## [31] "PBX4" "PCDHB12" "PGAM2" "PI4K2B" "PRCD"
## [36] "PTPN6" "RFPL4B" "RPP14" "SIX2" "TCL1A"
## [41] "TMEM86B" "UNC45B" "ZDHHC19" "ZNF883"
train_orphan02<-separate2GroupsCox(as.vector(coefs.v_orphan2),
xdata.train[, names(coefs.v_orphan2)],
ydata.train,
plot.title = 'Train Set orphan Net, alpha=0.2', legend.outside = FALSE)
train_orphan02$pvalue
## [1] 1.204941e-07
test_orphan02<-separate2GroupsCox(as.vector(coefs.v_orphan2),
xdata.test[, names(coefs.v_orphan2)],
ydata.test,
plot.title = 'Test Set orphan Net, alpha=0.2', legend.outside = FALSE)
test_orphan02$pvalue
## [1] 0.03266713
all_orphan02<-separate2GroupsCox(as.vector(coefs.v_orphan2),
xdata[, names(coefs.v_orphan2)],
ydata %>% mutate(time = time / 365 * 12),
plot.title = 'All data orphan Net, alpha=0.2', legend.outside = FALSE,
break.x.by = 12)
cv.fit_orphan1 <-cv.glmOrphan(as.matrix(xdata.train),as.matrix(ydata.train),
family = 'cox',foldid = my_foldid, network = "correlation",
alpha = 0.1,
nlambda = 100,
#lambda.min.ratio = 10^(-2),
nfolds= params$nfolds, network.options = networkOptions(cutoff = 0.001, min.degree = 0.6))
coefs.v_orphan1 <- coef(cv.fit_orphan1, s = 'lambda.min')[,1] %>% { .[. != 0]}
fit.selected_orphan1 <- which(cv.fit_orphan1$glmnet.fit$beta[,which(cv.fit_orphan1$cvm == min(cv.fit_orphan1$cvm))] != 0)
length(fit.selected_orphan1)
## [1] 67
names(fit.selected_orphan1)
## [1] "AASS" "AFF3" "BACH2" "BLK" "C1orf185"
## [6] "C7orf13" "CABP5" "CAPRIN2" "CCDC126" "CD19"
## [11] "CDH18" "CLDN9" "CLEC17A" "CLEC18C" "CLVS2"
## [16] "CYP7A1" "DCAF10" "DPPA2" "EEPD1" "ELFN1"
## [21] "EML6" "ENO3" "FAM129C" "FAM138B" "FAM159A"
## [26] "FAM81B" "FATE1" "FCER2" "FCRL1" "GABRD"
## [31] "GRAPL" "HPCAL1" "ISL2" "KCNMB3" "LBX2"
## [36] "LOC100130238" "LOC220930" "LOC646498" "LOC732275" "LRP2BP"
## [41] "MBL1P" "MEIG1" "MYF6" "MYH8" "NGLY1"
## [46] "NKAIN4" "NKX1-2" "PAX5" "PBX4" "PCDHB12"
## [51] "PGAM2" "PI4K2B" "PRCD" "PRR4" "PTPN6"
## [56] "RASA4P" "RFPL4B" "RPP14" "SIX2" "SLC25A18"
## [61] "TCF15" "TCL1A" "TMEM86B" "TUBA8" "UNC45B"
## [66] "ZDHHC19" "ZNF883"
train_orphan01<-separate2GroupsCox(as.vector(coefs.v_orphan1),
xdata.train[, names(coefs.v_orphan1)],
ydata.train,
plot.title = 'Train Set orphan Net, alpha=0.1', legend.outside = FALSE)
train_orphan01$pvalue
## [1] 6.802484e-09
test_orphan01<-separate2GroupsCox(as.vector(coefs.v_orphan1),
xdata.test[, names(coefs.v_orphan1)],
ydata.test,
plot.title = 'Test Set orphan Net, alpha=0.1', legend.outside = FALSE)
test_orphan01$pvalue
## [1] 0.06322569
all_orphan01<-separate2GroupsCox(as.vector(coefs.v_orphan1),
xdata[, names(coefs.v_orphan1)],
ydata %>% mutate(time = time / 365 * 12),
plot.title = 'All data orphan Net, alpha=0.1', legend.outside = FALSE,
break.x.by = 12)
cv.fit_orphan05<-cv.glmOrphan(as.matrix(xdata.train),as.matrix(ydata.train),
family = 'cox',foldid = my_foldid, network = "correlation",
alpha = 0.05,
nlambda = 100,
#lambda.min.ratio = 10^(-2),
nfolds= params$nfolds, network.options = networkOptions(cutoff = 0.001, min.degree = 0.6))
coefs.v_orphan05 <- coef(cv.fit_orphan05, s = 'lambda.min')[,1] %>% { .[. != 0]}
fit.selected_orphan05 <- which(cv.fit_orphan05$glmnet.fit$beta[,which(cv.fit_orphan05$cvm == min(cv.fit_orphan05$cvm))] != 0)
length(fit.selected_orphan05)
## [1] 99
train_orphan05<-separate2GroupsCox(as.vector(coefs.v_orphan05),
xdata.train[, names(coefs.v_orphan05)],
ydata.train,
plot.title = 'Train Set orphan Net, alpha=0.05', legend.outside = FALSE)
test_orphan05<-separate2GroupsCox(as.vector(coefs.v_orphan05),
xdata.test[, names(coefs.v_orphan05)],
ydata.test,
plot.title = 'Test Set orphan Net, alpha=0.05', legend.outside = FALSE)
all_orphan05<-separate2GroupsCox(as.vector(coefs.v_orphan05),
xdata[, names(coefs.v_orphan05)],
ydata %>% mutate(time = time / 365 * 12),
plot.title = 'All data orphan Net, alpha=0.05', legend.outside = FALSE,
break.x.by = 12)
alpha_points<-c(0.3,0.2,0.1,0.05)
pvalues_orphan_train<-c(train_orphan03$pvalue,train_orphan02$pvalue,train_orphan01$pvalue,train_orphan05$pvalue)
pvalues_orphan_test<-c(test_orphan03$pvalue,test_orphan02$pvalue,test_orphan01$pvalue,test_orphan05$pvalue)
pvalues_orphan_all<-c(all_orphan03$pvalue,all_orphan02$pvalue,all_orphan01$pvalue,all_orphan05$pvalue)
pvalues_orphan_values<-c(train_orphan03$pvalue,train_orphan02$pvalue,train_orphan01$pvalue,train_orphan05$pvalue,
test_orphan03$pvalue,test_orphan02$pvalue,test_orphan01$pvalue,test_orphan05$pvalue,
all_orphan03$pvalue,all_orphan02$pvalue,all_orphan01$pvalue,all_orphan05$pvalue)
pvalues_orphan_method<-c(rep("pvalues orphan train set",4),rep("pvalues orphan test set",4),rep("pvalues orphan all set",4))
pvalues_orphan<-data.frame(alpha_points,pvalues_orphan_values,pvalues_orphan_method)
p_pvalues_orphan<-ggplot(data = pvalues_orphan, aes(x = alpha_points, y = pvalues_orphan_values,color= pvalues_orphan_method)) + geom_line()+geom_point()+theme_minimal()
p_pvalues_orphan+ ylim(0, max(pvalues_orphan_values))
alpha_points<-c(0.3, 0.2,0.1,0.05)
#TCox (exp(-w^3))
pvalues_twexp2_values_test<-c(test_twii03$pvalue,test_twii02$pvalue,test_twii01$pvalue,test_twii05$pvalue)
#EN
pvalues_en_test<-c(test_en03$pvalue,test_en02$pvalue,test_en01$pvalue,test_en05$pvalue)
#Hub
pvalues_hub_test<-c(test_hub03$pvalue,test_hub02$pvalue,test_hub01$pvalue,test_hub05$pvalue)
#Orphan
pvalues_orphan_test<-c(test_orphan03$pvalue,test_orphan02$pvalue,test_orphan01$pvalue,test_orphan05$pvalue)
pvalues_test<-c(pvalues_twexp2_values_test, pvalues_en_test, pvalues_hub_test, pvalues_orphan_test)
Method <-c(rep("TCox",4), rep("EN",4), rep("HubCox",4), rep("OrphanCox",4))
pvalues_test_data<-data.frame(alpha_points,pvalues_test,Method)
p_pvalues_test<-ggplot(data = pvalues_test_data, aes(x = alpha_points, y = pvalues_test,color= Method)) + geom_hline(yintercept=0.05) + geom_line()+geom_point()+theme_minimal()
p_pvalues_test+ ylim(0, max(pvalues_test))
alpha_points<-c(0.3, 0.2,0.1,0.05)
#TCox (exp(-w^3))
pvalues_twexp2_values_train<-c(train_twexp203$pvalue,train_twexp202$pvalue,train_twexp201$pvalue,train_twexp205$pvalue)
#EN
pvalues_en_train<-c(train_en03$pvalue,train_en02$pvalue,train_en01$pvalue,train_en05$pvalue)
#Hub
pvalues_hub_train<-c(train_hub03$pvalue,train_hub02$pvalue,train_hub01$pvalue,train_hub05$pvalue)
#Orphan
pvalues_orphan_train<-c(train_orphan03$pvalue,train_orphan02$pvalue,train_orphan01$pvalue,train_orphan05$pvalue)
pvalues_train<-c(pvalues_twexp2_values_train, pvalues_en_train, pvalues_hub_train, pvalues_orphan_train)
Method <-c(rep("TCox",4), rep("EN",4), rep("HubCox",4), rep("OrphanCox",4))
pvalues_train_data<-data.frame(alpha_points,pvalues_train,Method)
p_pvalues_train<-ggplot(data = pvalues_train_data, aes(x = alpha_points, y = pvalues_train,color= Method)) + geom_hline(yintercept=0.05) + geom_line()+geom_point()+theme_minimal()
p_pvalues_train+ ylim(0, max(pvalues_train))
by all models
genes <- tumorf[,c("CYP7A1","FAM159A","LOC732275","CLDN9","LBX2","MEIG1","PAX5","NKAIN4","ZDHHC19","GRAPL","PCDHB12","EEPD1","HPCAL1","PGAM2","ZNF883","FAM138B","LOC646498","PRCD")]
geneNames(names(genes)) %>% { hallmarks(.$external_gene_name)$heatmap} + theme(title = element_text(size=9),axis.text=element_text(size=10), axis.title=element_text(size=8))
genes selected only by:
EN
genes <- tumorf[,c("HOTAIR","GJA3","LOC283663","DNAI2","NELF","GUCA1B")]
geneNames(names(genes)) %>% { hallmarks(.$external_gene_name)$heatmap} + theme(title = element_text(size=9),axis.text=element_text(size=10), axis.title=element_text(size=8))
glmSparseNet
genes <- tumorf[,c("CYGB","UNC13B","LIPT2","RFT1","BEND4","FAM24B","RASGRP2","SLFN11")]
geneNames(names(genes)) %>% { hallmarks(.$external_gene_name)$heatmap} + theme(title = element_text(size=9),axis.text=element_text(size=10), axis.title=element_text(size=8))
TCox
genes <- tumorf[,c("ANKRD26P1","CARKD","IGLON5","OSTN","RAB20","TXNL4B","AOX2P","DCLK3","FCRL2","SEPT7P2","ASPHD1","COL19A1","DCP1A","FLJ16779","LOC100303728","PCDHA7","SNTG1","COX4I2","NXF2B","TAC3","C20orf106","LOC285780","OR2T5","TERF2IP","CAPN7","OSBPL3","TRIM67")]
geneNames(names(genes)) %>% { hallmarks(.$external_gene_name)$heatmap} + theme(title = element_text(size=9),axis.text=element_text(size=10), axis.title=element_text(size=8))
clinic$neoplasm_diseasestage[clinic$neoplasm_diseasestage == "stage ivb"] <- 4
clinic$neoplasm_diseasestage[clinic$neoplasm_diseasestage == "stage iva"] <- 4
clinic$neoplasm_diseasestage[clinic$neoplasm_diseasestage == "stage iv"] <- 4
clinic$neoplasm_diseasestage[clinic$neoplasm_diseasestage == "stage iiic"] <- 3
clinic$neoplasm_diseasestage[clinic$neoplasm_diseasestage == "stage iiib"] <- 3
clinic$neoplasm_diseasestage[clinic$neoplasm_diseasestage == "stage iiia"] <- 3
clinic$neoplasm_diseasestage[clinic$neoplasm_diseasestage == "stage iii"] <- 3
clinic$neoplasm_diseasestage[clinic$neoplasm_diseasestage == "stage iic"] <- 2
clinic$neoplasm_diseasestage[clinic$neoplasm_diseasestage == "stage iib"] <- 2
clinic$neoplasm_diseasestage[clinic$neoplasm_diseasestage == "stage iia"] <- 2
clinic$neoplasm_diseasestage[clinic$neoplasm_diseasestage == "stage ii"] <- 2
clinic$neoplasm_diseasestage[clinic$neoplasm_diseasestage == "stage ia"] <- 1
clinic$neoplasm_diseasestage[clinic$neoplasm_diseasestage == "stage i"] <- 1
pathology_stage <- as.data.frame(clinic$neoplasm_diseasestage)
pathology_stage$names <- rownames(clinic)
pathology_stage$names <- toupper(pathology_stage$names)
pathology_stage$names <- chartr('.', '-', pathology_stage$names)
pathology_stage <- na.omit(pathology_stage)
row.names(pathology_stage) <- pathology_stage$names
pathology_stage <- as.data.frame(pathology_stage)
pathology_stage$`clinic$neoplasm_diseasestage` <- as.numeric(pathology_stage$`clinic$neoplasm_diseasestage`)
pathology_stage$`clinic$neoplasm_diseasestage`[pathology_stage$`clinic$neoplasm_diseasestage` == "1"] <- "early"
pathology_stage$`clinic$neoplasm_diseasestage`[pathology_stage$`clinic$neoplasm_diseasestage` == "2"] <- "early"
pathology_stage$`clinic$neoplasm_diseasestage`[pathology_stage$`clinic$neoplasm_diseasestage` == "3"] <- "early"
pathology_stage$`clinic$neoplasm_diseasestage`[pathology_stage$`clinic$neoplasm_diseasestage` == "4"] <- "advanced"
all_data = merge(tumorf,pathology_stage,by="row.names")
#rownames(all_data) <- all_data$submitter_id
xdata <- apply(as.matrix(all_data[,2:20502]),2,as.numeric)
xdata <- xdata[ ,- as.numeric(which(apply(xdata, 2, var) == 0))]
xdata <- t(xdata)
group <- all_data$`clinic$neoplasm_diseasestage`
y <- DGEList(counts=xdata,group=group)
#remove genes that are unexpressed or very lowly expressed in the samples
keep <- rowSums(cpm(y)>1) >= 2
y <- y[keep, , keep.lib.sizes=FALSE]
y <- calcNormFactors(y)
# plotMDS(y)
design <- model.matrix(~group)
y <- estimateDisp(y,design)
plotBCV(y)
# y$samples
# levels(y$samples$group)
To perform quasi-likelihood F-tests (The quasi-likelihood method is highly recommended for differential expression analyses of bulk RNA-seq data as it gives stricter error rate control by accounting for the uncertainty in dispersion estimation)
fit <- glmQLFit(y, design)
qlf <- glmQLFTest(fit,coef=2)
# lrt <- glmLRT(fit)
topTags(qlf)
## Coefficient: groupearly
## logFC logCPM F PValue FDR
## XAGE1D -7.278711 -1.2348223 608.8796 5.266627e-71 8.974859e-67
## CT45A4 -5.190992 -2.8256847 595.3811 4.237922e-70 3.546180e-66
## CT45A3 -7.247917 -1.5793655 592.8965 6.242909e-70 3.546180e-66
## DKFZp686A1627 -5.847695 -2.5227528 590.9097 8.516663e-70 3.628311e-66
## DCAF4L2 -6.633494 -2.0817666 588.1865 1.305114e-69 4.448089e-66
## MAGEA10 -6.258910 -2.1003651 551.9612 4.360032e-67 1.238322e-63
## CXorf49B -8.160416 0.1161680 505.5592 1.098395e-63 2.673965e-60
## FOXR2 -4.804525 -2.8724963 477.0349 1.716385e-61 3.656115e-58
## CT45A1 -7.280180 -0.6235422 426.2823 2.264051e-57 4.286855e-54
## TUBA3C -4.771858 -2.5453413 357.1792 3.014495e-51 5.137001e-48
summary(decideTests(qlf))
## groupearly
## Down 602
## NotSig 16079
## Up 360
a <- qlf$table
a <- a[a$PValue < 0.05, ]
write.table(a, "tabela_genes_degs_earlyvsadv.csv", append = TRUE)
Plot log-fold change against log-counts per million, with DE genes highlighted:
plotMD(qlf)
abline(h=c(-1, 1), col="blue")
# tumor 373
# normal 60
tumorf <- t(tumorf)
normalf <- t(normalf)
all = merge(tumorf,normalf,by="row.names")
rownames(all) <- all$Row.names
all <- all[,-1]
all <- t(all)
all <- as.data.frame(all)
all$type[1:433] <- "normal"
all$type[1:373] <- "tumor"
xdatadeg <- apply(as.matrix(all[,1:20501]),2,as.numeric)
xdatadeg <- xdatadeg[,apply(xdatadeg!=0, 2, all)]
xdatadeg <- t(xdatadeg)
group <- all$type
y <- DGEList(counts=xdatadeg,group=group)
#remove genes that are unexpressed or very lowly expressed in the samples
keep <- rowSums(cpm(y)>1) >= 2
y <- y[keep, , keep.lib.sizes=FALSE]
y <- calcNormFactors(y)
# plotMDS(y)
design <- model.matrix(~group)
y <- estimateDisp(y,design)
plotBCV(y)
# y$samples
# levels(y$samples$group)
To perform quasi-likelihood F-tests (The quasi-likelihood method is highly recommended for differential expression analyses of bulk RNA-seq data as it gives stricter error rate control by accounting for the uncertainty in dispersion estimation)
fit <- glmQLFit(y, design)
qlf <- glmQLFTest(fit,coef=2)
# lrt <- glmLRT(fit)
topTags(qlf)
## Coefficient: grouptumor
## logFC logCPM F PValue FDR
## UGP2 -1.907729 7.2692269 1066.7289 4.101999e-119 5.450736e-115
## CLEC3B -3.830019 3.9500075 880.0177 1.472141e-106 9.780906e-103
## CUBN -5.121942 1.8001048 864.0506 2.105275e-105 9.324966e-102
## A1BG -4.951522 2.7322544 854.6489 1.023940e-104 3.401530e-101
## PHLPP2 -2.385443 5.0304506 762.5613 1.040458e-97 2.765122e-94
## SULT1A2 -3.916172 2.2476161 756.2584 3.283824e-97 7.272575e-94
## KCNIP4 -2.696053 0.7653609 721.3317 2.144063e-94 4.070044e-91
## PEX26 -1.855932 5.3462240 694.9992 3.238848e-92 5.379727e-89
## ZNF575 -2.160721 1.9742580 687.2304 1.455565e-91 2.149060e-88
## PLCD1 -2.225580 4.6921304 668.6268 5.552143e-90 7.377688e-87
summary(decideTests(qlf))
## grouptumor
## Down 4639
## NotSig 3056
## Up 5593
b <- qlf$table
b <- b[a$PValue < 0.05, ]
write.table(b, "tabela_genes_degs_tumorvsnorm.csv", append = TRUE)
Plot log-fold change against log-counts per million, with DE genes highlighted:
plotMD(qlf)
abline(h=c(-1, 1), col="blue")
# tumor 373
# normal 60
few <- all[,c("CYP7A1",
"FAM159A",
"LOC732275",
"CLDN9",
"LBX2",
"MEIG1",
"PAX5",
"NKAIN4",
"ZDHHC19",
"GRAPL",
"PCDHB12",
"EEPD1",
"HPCAL1",
"PGAM2",
"ZNF883",
"FAM138B",
"LOC646498",
"PRCD","type")]
xdatadeg <- apply(as.matrix(few[,1:18]),2,as.numeric)
xdatadeg <- t(xdatadeg)
group <- all$type
y <- DGEList(counts=xdatadeg,group=group)
#remove genes that are unexpressed or very lowly expressed in the samples
keep <- rowSums(cpm(y)>1) >= 2
y <- y[keep, , keep.lib.sizes=FALSE]
y <- calcNormFactors(y)
# plotMDS(y)
design <- model.matrix(~group)
y <- estimateDisp(y,design)
plotBCV(y)
# y$samples
# levels(y$samples$group)
fit <- glmQLFit(y, design)
qlf <- glmQLFTest(fit,coef=2)
# lrt <- glmLRT(fit)
topTags(qlf)
## Coefficient: grouptumor
## logFC logCPM F PValue FDR
## PRCD -0.9164964 11.52905 65.65138 5.480847e-15 9.865525e-14
## LBX2 1.1617787 12.46498 55.69423 4.644110e-13 4.179699e-12
## EEPD1 0.8073150 18.30913 51.18356 3.589396e-12 2.153638e-11
## PAX5 -1.8627698 13.27415 43.30143 1.350442e-10 6.076989e-10
## ZDHHC19 -1.2580156 10.27088 40.31242 5.447264e-10 1.961015e-09
## FAM159A -1.0008112 11.37126 30.59998 5.480797e-08 1.644239e-07
## CLDN9 1.3526544 14.05259 27.23531 2.793765e-07 7.183967e-07
## CYP7A1 -1.5118019 10.43878 26.68755 3.648163e-07 8.208366e-07
## PGAM2 -0.4782448 12.93011 14.72795 1.425790e-04 2.851580e-04
## NKAIN4 -0.7753835 11.82751 10.16699 1.532944e-03 2.759300e-03
summary(decideTests(qlf))
## grouptumor
## Down 9
## NotSig 6
## Up 3
decideTests(qlf)
## TestResults matrix
## grouptumor
## CYP7A1 -1
## FAM159A -1
## LOC732275 0
## CLDN9 1
## LBX2 1
## 13 more rows ...
plotMD(qlf)
abline(h=c(-1, 1), col="blue")
# tumor 373
# normal 60
few <- all[,c("ANKRD26P1",
"CARKD",
"IGLON5",
"OSTN",
"RAB20",
"TXNL4B",
"AOX2P",
"DCLK3",
"FCRL2",
"SEPT7P2",
"ASPHD1",
"COL19A1",
"DCP1A",
"FLJ16779",
"LOC100303728",
"PCDHA7",
"SNTG1",
"COX4I2",
"NXF2B",
"TAC3",
"C20orf106",
"LOC285780",
"OR2T5",
"TERF2IP",
"CAPN7",
"OSBPL3",
"TRIM67",
"type")]
xdatadeg <- apply(as.matrix(few[,1:27]),2,as.numeric)
xdatadeg <- t(xdatadeg)
group <- all$type
y <- DGEList(counts=xdatadeg,group=group)
#remove genes that are unexpressed or very lowly expressed in the samples
keep <- rowSums(cpm(y)>1) >= 2
y <- y[keep, , keep.lib.sizes=FALSE]
y <- calcNormFactors(y)
# plotMDS(y)
design <- model.matrix(~group)
y <- estimateDisp(y,design)
plotBCV(y)
# y$samples
# levels(y$samples$group)
fit <- glmQLFit(y, design)
qlf <- glmQLFTest(fit,coef=2)
# lrt <- glmLRT(fit)
topTags(qlf)
## Coefficient: grouptumor
## logFC logCPM F PValue FDR
## OSBPL3 1.4432879 17.148381 256.00019 1.117076e-45 3.016104e-44
## COL19A1 -2.7128076 9.724384 221.93491 7.074003e-41 9.549904e-40
## ASPHD1 2.0497160 14.607284 108.02651 9.304629e-23 7.392128e-22
## FCRL2 -2.3175345 11.665128 107.62380 1.095130e-22 7.392128e-22
## LOC100303728 -0.8392998 11.259928 88.68043 2.703291e-19 1.459777e-18
## DCLK3 0.9927463 11.276039 42.95900 1.579131e-10 7.106088e-10
## SEPT7P2 0.3614066 14.166342 26.53498 3.925486e-07 1.514116e-06
## NXF2B 1.7926739 8.549050 24.11833 1.282660e-06 4.328979e-06
## TXNL4B 0.3175827 15.624186 23.63773 1.625180e-06 4.875539e-06
## FLJ16779 1.3952915 11.111728 22.81463 2.439858e-06 6.587616e-06
summary(decideTests(qlf))
## grouptumor
## Down 7
## NotSig 11
## Up 9
decideTests(qlf)
## TestResults matrix
## grouptumor
## ANKRD26P1 1
## CARKD 0
## IGLON5 0
## OSTN -1
## RAB20 0
## 22 more rows ...
qlf$table
## logFC logCPM F PValue
## ANKRD26P1 0.855862474 8.457990 13.08739072 3.067562e-03
## CARKD 0.014862117 17.642616 0.05936445 8.076177e-01
## IGLON5 0.436622984 10.830858 3.70631249 5.485624e-02
## OSTN -0.970454241 8.339538 161.97311151 1.157546e-05
## RAB20 0.011329195 17.239373 0.02430251 8.761897e-01
## TXNL4B 0.317582692 15.624186 23.63772694 1.625180e-06
## AOX2P 0.180206681 8.370341 1.51393915 5.112444e-01
## DCLK3 0.992746286 11.276039 42.95899786 1.579131e-10
## FCRL2 -2.317534452 11.665128 107.62379739 1.095130e-22
## SEPT7P2 0.361406595 14.166342 26.53497736 3.925486e-07
## ASPHD1 2.049716038 14.607284 108.02651292 9.304629e-23
## COL19A1 -2.712807613 9.724384 221.93491411 7.074003e-41
## DCP1A 0.191739065 17.009783 8.97049802 2.899824e-03
## FLJ16779 1.395291532 11.111728 22.81462741 2.439858e-06
## LOC100303728 -0.839299790 11.259928 88.68043132 2.703291e-19
## PCDHA7 -0.329656385 10.222402 2.48335938 1.157805e-01
## SNTG1 -0.223189775 8.346860 2.27166392 4.282530e-01
## COX4I2 0.094096864 10.866250 0.38219930 5.367509e-01
## NXF2B 1.792673898 8.549050 24.11833351 1.282660e-06
## TAC3 -0.695457641 11.132690 11.51492294 7.533715e-04
## C20orf106 0.151903163 8.817470 0.67892262 4.104078e-01
## LOC285780 -0.840445757 8.409863 24.66504888 2.171565e-03
## OR2T5 0.184824136 8.343255 3.47326852 3.940183e-01
## TERF2IP -0.040738071 17.329669 0.76677326 3.816986e-01
## CAPN7 0.008884825 16.667587 0.02615964 8.715859e-01
## OSBPL3 1.443287906 17.148381 256.00018660 1.117076e-45
## TRIM67 -0.493439909 10.085970 10.99062251 9.921363e-04
plotMD(qlf)
abline(h=c(-1, 1), col="blue")
## Tumor vs. normal genes selected by EN
# tumor 373
# normal 60
few <- all[,c("HOTAIR", "GJA3", "LOC283663", "DNAI2", "NELF", "GUCA1B",
"type")]
xdatadeg <- apply(as.matrix(few[,1:6]),2,as.numeric)
xdatadeg <- t(xdatadeg)
group <- all$type
y <- DGEList(counts=xdatadeg,group=group)
#remove genes that are unexpressed or very lowly expressed in the samples
keep <- rowSums(cpm(y)>1) >= 2
y <- y[keep, , keep.lib.sizes=FALSE]
y <- calcNormFactors(y)
# plotMDS(y)
design <- model.matrix(~group)
y <- estimateDisp(y,design)
plotBCV(y)
# y$samples
# levels(y$samples$group)
fit <- glmQLFit(y, design)
qlf <- glmQLFTest(fit,coef=2)
# lrt <- glmLRT(fit)
topTags(qlf)
## Coefficient: grouptumor
## logFC logCPM F PValue FDR
## LOC283663 -1.1438086 15.16075 62.409945 2.305938e-14 1.383563e-13
## GJA3 1.7383317 13.14146 23.949152 1.395968e-06 4.187905e-06
## HOTAIR 1.6202151 12.89395 10.495759 1.288313e-03 2.576627e-03
## NELF 0.1682085 19.93805 5.095770 2.447962e-02 3.671944e-02
## DNAI2 -0.6031510 10.22343 4.674232 3.116320e-02 3.739584e-02
## GUCA1B -0.2152865 13.39286 1.704419 1.924014e-01 1.924014e-01
summary(decideTests(qlf))
## grouptumor
## Down 2
## NotSig 1
## Up 3
decideTests(qlf)
## TestResults matrix
## grouptumor
## HOTAIR 1
## GJA3 1
## LOC283663 -1
## DNAI2 -1
## NELF 1
## GUCA1B 0
qlf$table
## logFC logCPM F PValue
## HOTAIR 1.6202151 12.89395 10.495759 1.288313e-03
## GJA3 1.7383317 13.14146 23.949152 1.395968e-06
## LOC283663 -1.1438086 15.16075 62.409945 2.305938e-14
## DNAI2 -0.6031510 10.22343 4.674232 3.116320e-02
## NELF 0.1682085 19.93805 5.095770 2.447962e-02
## GUCA1B -0.2152865 13.39286 1.704419 1.924014e-01
plotMD(qlf)
abline(h=c(-1, 1), col="blue")
## Tumor vs. normal genes selected by HUB
# tumor 373
# normal 60
few <- all[,c("CYGB", "UNC13B", "LIPT2", "RFT1", "BEND4", "FAM24B", "RASGRP2", "SLFN11",
"type")]
xdatadeg <- apply(as.matrix(few[,1:8]),2,as.numeric)
xdatadeg <- t(xdatadeg)
group <- all$type
y <- DGEList(counts=xdatadeg,group=group)
#remove genes that are unexpressed or very lowly expressed in the samples
keep <- rowSums(cpm(y)>1) >= 2
y <- y[keep, , keep.lib.sizes=FALSE]
y <- calcNormFactors(y)
# plotMDS(y)
design <- model.matrix(~group)
y <- estimateDisp(y,design)
plotBCV(y)
# y$samples
# levels(y$samples$group)
fit <- glmQLFit(y, design)
qlf <- glmQLFTest(fit,coef=2)
# lrt <- glmLRT(fit)
topTags(qlf)
## Coefficient: grouptumor
## logFC logCPM F PValue FDR
## BEND4 -2.23226211 11.17524 178.4118594 2.161949e-34 1.729559e-33
## RASGRP2 -1.50978636 14.27713 100.7729807 1.739134e-21 6.956534e-21
## RFT1 0.85082359 17.73505 97.8953741 5.687952e-21 1.516787e-20
## FAM24B 1.31414753 13.51713 46.2870925 3.356109e-11 6.712218e-11
## LIPT2 0.64489476 13.29540 34.3161182 9.196043e-09 1.471367e-08
## CYGB -0.31693896 17.00330 9.1675519 2.608502e-03 3.478002e-03
## UNC13B 0.05581413 19.03481 0.5044560 4.779256e-01 5.462007e-01
## SLFN11 0.06989973 16.59031 0.2387515 6.253532e-01 6.253532e-01
summary(decideTests(qlf))
## grouptumor
## Down 3
## NotSig 2
## Up 3
decideTests(qlf)
## TestResults matrix
## grouptumor
## CYGB -1
## UNC13B 0
## LIPT2 1
## RFT1 1
## BEND4 -1
## FAM24B 1
## RASGRP2 -1
## SLFN11 0
qlf$table
## logFC logCPM F PValue
## CYGB -0.31693896 17.00330 9.1675519 2.608502e-03
## UNC13B 0.05581413 19.03481 0.5044560 4.779256e-01
## LIPT2 0.64489476 13.29540 34.3161182 9.196043e-09
## RFT1 0.85082359 17.73505 97.8953741 5.687952e-21
## BEND4 -2.23226211 11.17524 178.4118594 2.161949e-34
## FAM24B 1.31414753 13.51713 46.2870925 3.356109e-11
## RASGRP2 -1.50978636 14.27713 100.7729807 1.739134e-21
## SLFN11 0.06989973 16.59031 0.2387515 6.253532e-01
plotMD(qlf)
abline(h=c(-1, 1), col="blue")
## Tumor vs. normal genes selected by all models + TCox + EN + HUB
# tumor 373
# normal 60
few <- all[,c("CYP7A1",
"FAM159A",
"LOC732275",
"CLDN9",
"LBX2",
"MEIG1",
"PAX5",
"NKAIN4",
"ZDHHC19",
"GRAPL",
"PCDHB12",
"EEPD1",
"HPCAL1",
"PGAM2",
"ZNF883",
"FAM138B",
"LOC646498",
"PRCD",
"ANKRD26P1",
"CARKD",
"IGLON5",
"OSTN",
"RAB20",
"TXNL4B",
"AOX2P",
"DCLK3",
"FCRL2",
"SEPT7P2",
"ASPHD1",
"COL19A1",
"DCP1A",
"FLJ16779",
"LOC100303728",
"PCDHA7",
"SNTG1",
"COX4I2",
"NXF2B",
"TAC3",
"C20orf106",
"LOC285780",
"OR2T5",
"TERF2IP",
"CAPN7",
"OSBPL3",
"TRIM67","HOTAIR", "GJA3", "LOC283663", "DNAI2", "NELF", "GUCA1B", "CYGB", "UNC13B", "LIPT2", "RFT1", "BEND4", "FAM24B", "RASGRP2", "SLFN11",
"type")]
xdatadeg <- apply(as.matrix(few[,1:59]),2,as.numeric)
xdatadeg <- t(xdatadeg)
group <- all$type
y <- DGEList(counts=xdatadeg,group=group)
#remove genes that are unexpressed or very lowly expressed in the samples
keep <- rowSums(cpm(y)>1) >= 2
y <- y[keep, , keep.lib.sizes=FALSE]
y <- calcNormFactors(y)
# plotMDS(y)
design <- model.matrix(~group)
y <- estimateDisp(y,design)
plotBCV(y)
# y$samples
# levels(y$samples$group)
fit <- glmQLFit(y, design)
qlf <- glmQLFTest(fit,coef=2)
# lrt <- glmLRT(fit)
topTags(qlf)
## Coefficient: grouptumor
## logFC logCPM F PValue FDR
## COL19A1 -2.8179030 8.595589 225.37179 2.304581e-41 1.359703e-39
## BEND4 -2.7094111 9.211264 212.36306 1.786624e-39 5.270542e-38
## OSBPL3 1.3627386 16.003681 202.04329 6.004244e-38 1.180835e-36
## RASGRP2 -1.9542075 12.288638 155.30003 1.052623e-30 1.552620e-29
## FCRL2 -2.4362515 10.534432 117.35523 2.225992e-24 2.626671e-23
## LOC100303728 -0.9520623 10.116237 112.04371 1.855834e-23 1.824904e-22
## ASPHD1 1.9357883 13.402640 106.51526 1.726223e-22 1.454959e-21
## PRCD -1.0156103 8.964546 100.15023 2.317828e-21 1.709398e-20
## CYGB -0.8826620 14.901394 74.53855 1.126285e-16 7.383422e-16
## LBX2 1.0930513 9.907015 50.89707 4.081197e-12 2.209211e-11
summary(decideTests(qlf))
## grouptumor
## Down 23
## NotSig 20
## Up 16
decideTests(qlf)
## TestResults matrix
## grouptumor
## CYP7A1 -1
## FAM159A -1
## LOC732275 0
## CLDN9 1
## LBX2 1
## 54 more rows ...
qlf$table
## logFC logCPM F PValue
## CYP7A1 -1.63334908 7.921844 32.16798004 2.578170e-08
## FAM159A -1.09238629 8.819147 39.31712906 8.676121e-10
## LOC732275 -0.05423451 7.178948 0.37410537 8.073985e-01
## CLDN9 1.30057881 11.480171 25.95448245 5.214578e-07
## LBX2 1.09305129 9.907015 50.89707239 4.081197e-12
## MEIG1 0.14003274 7.587517 0.42857904 5.645820e-01
## PAX5 -1.93373481 10.738603 48.27667935 1.353926e-11
## NKAIN4 -0.25267343 9.247261 1.06093696 3.035725e-01
## ZDHHC19 -1.37161655 7.746099 50.87693263 4.118867e-12
## GRAPL -0.11850276 8.810476 0.40196820 5.264069e-01
## PCDHB12 -0.50691866 10.817809 11.48211297 7.665154e-04
## EEPD1 0.71117278 15.729398 43.28256382 1.360090e-10
## HPCAL1 -0.02031115 16.800375 0.05475514 8.150962e-01
## PGAM2 -0.52915848 10.397784 17.09801459 4.259255e-05
## ZNF883 -0.03532704 10.150584 0.01805584 8.931704e-01
## FAM138B -0.87815262 7.205567 41.76825770 1.429707e-03
## LOC646498 -0.12450154 7.169595 4.30151902 5.269022e-01
## PRCD -1.01561035 8.964546 100.15023322 2.317828e-21
## ANKRD26P1 0.80527058 7.293802 11.42680716 6.184645e-03
## CARKD -0.08456063 16.510355 1.46748347 2.263991e-01
## IGLON5 0.31485549 9.668320 1.92433440 1.660881e-01
## OSTN -1.04005630 7.179727 168.37847644 6.760930e-06
## RAB20 -0.09936022 16.098856 1.66212643 1.979998e-01
## TXNL4B 0.20259027 14.477214 8.61151296 3.516813e-03
## AOX2P 0.14974118 7.210911 0.93978526 6.042511e-01
## DCLK3 0.90512959 10.136943 33.43675229 1.405100e-08
## FCRL2 -2.43625149 10.534432 117.35523476 2.225992e-24
## SEPT7P2 0.26240262 13.022431 12.21257985 5.232448e-04
## ASPHD1 1.93578827 13.402640 106.51526030 1.726223e-22
## COL19A1 -2.81790297 8.595589 225.37178995 2.304581e-41
## DCP1A 0.07496035 15.864870 1.21140171 2.716618e-01
## FLJ16779 1.34530173 9.928667 21.26377598 5.266133e-06
## LOC100303728 -0.95206229 10.116237 112.04371068 1.855834e-23
## PCDHA7 -0.45736970 9.075902 4.63035858 3.195931e-02
## SNTG1 -0.28313781 7.186221 3.21741533 3.587198e-01
## COX4I2 -0.05418245 9.688488 0.13636363 7.121024e-01
## NXF2B 1.75559879 7.383321 22.81717325 2.437667e-06
## TAC3 -0.87441618 9.987477 18.26749338 2.359190e-05
## C20orf106 0.03835324 7.657908 0.04571520 8.307943e-01
## LOC285780 -0.91060992 7.250830 27.54037455 1.003767e-03
## OR2T5 0.17011766 7.181836 2.84015052 4.455165e-01
## TERF2IP -0.16399165 16.183656 10.63005518 1.199856e-03
## CAPN7 -0.11227073 15.530615 3.23536145 7.275568e-02
## OSBPL3 1.36273864 16.003681 202.04329084 6.004244e-38
## TRIM67 -0.62128077 8.934436 17.10274433 4.249062e-05
## HOTAIR 2.22595482 9.930021 20.66461442 7.096895e-06
## GJA3 2.26569881 10.076348 43.94657962 9.991203e-11
## LOC283663 -0.70462996 12.043082 28.10877140 1.826031e-07
## DNAI2 -0.22683552 7.371323 0.82030724 4.191321e-01
## NELF 0.77260219 17.089483 42.42859172 2.023825e-10
## GUCA1B 0.29896656 10.380007 3.47556637 6.295300e-02
## CYGB -0.88266200 14.901394 74.53854865 1.126285e-16
## UNC13B -0.44640654 16.957731 22.81745477 2.437328e-06
## LIPT2 0.11119801 11.132369 1.21413377 2.711227e-01
## RFT1 0.35694181 15.549585 22.91808355 2.319064e-06
## BEND4 -2.70941108 9.211264 212.36305755 1.786624e-39
## FAM24B 0.81794341 11.312068 20.17526923 9.060076e-06
## RASGRP2 -1.95420751 12.288638 155.30002946 1.052623e-30
## SLFN11 -0.44088416 14.516353 8.80868002 3.163082e-03
plotMD(qlf)
abline(h=c(-1, 1), col="blue")
# Survival with genes selected
ANKRD26P1
tumorf <- as.data.frame(t(tumorf))
gene <- as.data.frame(tumorf$ANKRD26P1)
gene$rownames <- rownames(tumorf)
gene$type[gene$`tumorf$ANKRD26P1` <= median(gene$`tumorf$ANKRD26P1`)] <- "low"
gene$type[gene$`tumorf$ANKRD26P1` > median(gene$`tumorf$ANKRD26P1`)] <- "high"
rownames(gene) <- gene$rownames
pathology_stage <- merge(datasurvf, gene, by="row.names")
time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type
# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv
# +did not died
# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)
plot(fitkir)
ggsurvplot(fitkir, data = pathology_stage, pval = TRUE, title = "ANKRD26P1")
# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 97 20 19.3 0.0275 0.0379
## path_stage=low 260 52 52.7 0.0101 0.0379
##
## Chisq= 0 on 1 degrees of freedom, p= 0.8
CARKD
gene <- as.data.frame(tumorf$CARKD)
gene$rownames <- rownames(tumorf)
gene$type[gene$`tumorf$CARKD` <= median(gene$`tumorf$CARKD`)] <- "low"
gene$type[gene$`tumorf$CARKD` > median(gene$`tumorf$CARKD`)] <- "high"
rownames(gene) <- gene$rownames
pathology_stage <- merge(datasurvf, gene, by="row.names")
time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type
# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv
# +did not died
# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)
plot(fitkir)
ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "CARKD")
# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 181 39 36.3 0.195 0.394
## path_stage=low 176 33 35.7 0.199 0.394
##
## Chisq= 0.4 on 1 degrees of freedom, p= 0.5
IGLON5
gene <- as.data.frame(tumorf$IGLON5)
gene$rownames <- rownames(tumorf)
gene$type[gene$`tumorf$IGLON5` <= median(gene$`tumorf$IGLON5`)] <- "low"
gene$type[gene$`tumorf$IGLON5` > median(gene$`tumorf$IGLON5`)] <- "high"
rownames(gene) <- gene$rownames
pathology_stage <- merge(datasurvf, gene, by="row.names")
time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type
# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv
# +did not died
# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)
plot(fitkir)
ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "IGLON5")
# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 182 41 31.8 2.66 4.81
## path_stage=low 175 31 40.2 2.11 4.81
##
## Chisq= 4.8 on 1 degrees of freedom, p= 0.03
OSTN
gene <- as.data.frame(tumorf$OSTN)
gene$rownames <- rownames(tumorf)
gene$type[gene$`tumorf$OSTN` <= median(gene$`tumorf$OSTN`)] <- "low"
gene$type[gene$`tumorf$OSTN` > median(gene$`tumorf$OSTN`)] <- "high"
rownames(gene) <- gene$rownames
pathology_stage <- merge(datasurvf, gene, by="row.names")
time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type
# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv
# +did not died
# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)
plot(fitkir)
ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "OSTN")
# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 8 5 0.692 26.79 27.3
## path_stage=low 349 67 71.308 0.26 27.3
##
## Chisq= 27.3 on 1 degrees of freedom, p= 2e-07
RAB20
gene <- as.data.frame(tumorf$RAB20)
gene$rownames <- rownames(tumorf)
gene$type[gene$`tumorf$RAB20` <= median(gene$`tumorf$RAB20`)] <- "low"
gene$type[gene$`tumorf$RAB20` > median(gene$`tumorf$RAB20`)] <- "high"
rownames(gene) <- gene$rownames
pathology_stage <- merge(datasurvf, gene, by="row.names")
time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type
# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv
# +did not died
# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)
plot(fitkir)
ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "RAB20")
# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 181 39 36.8 0.129 0.265
## path_stage=low 176 33 35.2 0.135 0.265
##
## Chisq= 0.3 on 1 degrees of freedom, p= 0.6
TXNL4B
gene <- as.data.frame(tumorf$TXNL4B)
gene$rownames <- rownames(tumorf)
gene$type[gene$`tumorf$TXNL4B` <= median(gene$`tumorf$TXNL4B`)] <- "low"
gene$type[gene$`tumorf$TXNL4B` > median(gene$`tumorf$TXNL4B`)] <- "high"
rownames(gene) <- gene$rownames
pathology_stage <- merge(datasurvf, gene, by="row.names")
time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type
# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv
# +did not died
# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)
plot(fitkir)
ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "TXNL4B")
# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 179 40 37.9 0.120 0.255
## path_stage=low 178 32 34.1 0.133 0.255
##
## Chisq= 0.3 on 1 degrees of freedom, p= 0.6
AOX2P
gene <- as.data.frame(tumorf$AOX2P)
gene$rownames <- rownames(tumorf)
gene$type[gene$`tumorf$AOX2P` <= median(gene$`tumorf$AOX2P`)] <- "low"
gene$type[gene$`tumorf$AOX2P` > median(gene$`tumorf$AOX2P`)] <- "high"
rownames(gene) <- gene$rownames
pathology_stage <- merge(datasurvf, gene, by="row.names")
time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type
# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv
# +did not died
# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)
plot(fitkir)
ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "AOX2P")
# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 47 11 7.03 2.247 2.53
## path_stage=low 310 61 64.97 0.243 2.53
##
## Chisq= 2.5 on 1 degrees of freedom, p= 0.1
DCLK3
gene <- as.data.frame(tumorf$DCLK3)
gene$rownames <- rownames(tumorf)
gene$type[gene$`tumorf$DCLK3` <= median(gene$`tumorf$DCLK3`)] <- "low"
gene$type[gene$`tumorf$DCLK3` > median(gene$`tumorf$DCLK3`)] <- "high"
rownames(gene) <- gene$rownames
pathology_stage <- merge(datasurvf, gene, by="row.names")
time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type
# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv
# +did not died
# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)
plot(fitkir)
ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "DCLK3")
# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 177 39 35 0.451 0.881
## path_stage=low 180 33 37 0.427 0.881
##
## Chisq= 0.9 on 1 degrees of freedom, p= 0.3
FCRL2
gene <- as.data.frame(tumorf$FCRL2)
gene$rownames <- rownames(tumorf)
gene$type[gene$`tumorf$FCRL2` <= median(gene$`tumorf$FCRL2`)] <- "low"
gene$type[gene$`tumorf$FCRL2` > median(gene$`tumorf$FCRL2`)] <- "high"
rownames(gene) <- gene$rownames
pathology_stage <- merge(datasurvf, gene, by="row.names")
time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type
# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv
# +did not died
# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)
plot(fitkir)
ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "FCRL2")
# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 182 31 32.2 0.0430 0.0787
## path_stage=low 175 41 39.8 0.0347 0.0787
##
## Chisq= 0.1 on 1 degrees of freedom, p= 0.8
SEPT7P2
gene <- as.data.frame(tumorf$SEPT7P2)
gene$rownames <- rownames(tumorf)
gene$type[gene$`tumorf$SEPT7P2` <= median(gene$`tumorf$SEPT7P2`)] <- "low"
gene$type[gene$`tumorf$SEPT7P2` > median(gene$`tumorf$SEPT7P2`)] <- "high"
rownames(gene) <- gene$rownames
pathology_stage <- merge(datasurvf, gene, by="row.names")
time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type
# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv
# +did not died
# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)
plot(fitkir)
ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "SEPT7P2")
# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 177 43 35.5 1.56 3.12
## path_stage=low 180 29 36.5 1.52 3.12
##
## Chisq= 3.1 on 1 degrees of freedom, p= 0.08
ASPHD1
gene <- as.data.frame(tumorf$ASPHD1)
gene$rownames <- rownames(tumorf)
gene$type[gene$`tumorf$ASPHD1` <= median(gene$`tumorf$ASPHD1`)] <- "low"
gene$type[gene$`tumorf$ASPHD1` > median(gene$`tumorf$ASPHD1`)] <- "high"
rownames(gene) <- gene$rownames
pathology_stage <- merge(datasurvf, gene, by="row.names")
time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type
# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv
# +did not died
# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)
plot(fitkir)
ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "ASPHD1")
# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 178 43 35.2 1.72 3.38
## path_stage=low 179 29 36.8 1.65 3.38
##
## Chisq= 3.4 on 1 degrees of freedom, p= 0.07
COL19A1
gene <- as.data.frame(tumorf$COL19A1)
gene$rownames <- rownames(tumorf)
gene$type[gene$`tumorf$COL19A1` <= median(gene$`tumorf$COL19A1`)] <- "low"
gene$type[gene$`tumorf$COL19A1` > median(gene$`tumorf$COL19A1`)] <- "high"
rownames(gene) <- gene$rownames
pathology_stage <- merge(datasurvf, gene, by="row.names")
time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type
# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv
# +did not died
# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)
plot(fitkir)
ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "COL19A1")
# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 182 30 32.5 0.193 0.357
## path_stage=low 175 42 39.5 0.159 0.357
##
## Chisq= 0.4 on 1 degrees of freedom, p= 0.6
DCP1A
gene <- as.data.frame(tumorf$DCP1A)
gene$rownames <- rownames(tumorf)
gene$type[gene$`tumorf$DCP1A` <= median(gene$`tumorf$DCP1A`)] <- "low"
gene$type[gene$`tumorf$DCP1A` > median(gene$`tumorf$DCP1A`)] <- "high"
rownames(gene) <- gene$rownames
pathology_stage <- merge(datasurvf, gene, by="row.names")
time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type
# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv
# +did not died
# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)
plot(fitkir)
ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "DCP1A")
# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 175 27 35.1 1.89 3.71
## path_stage=low 182 45 36.9 1.80 3.71
##
## Chisq= 3.7 on 1 degrees of freedom, p= 0.05
FLJ16779
gene <- as.data.frame(tumorf$FLJ16779)
gene$rownames <- rownames(tumorf)
gene$type[gene$`tumorf$FLJ16779` <= median(gene$`tumorf$FLJ16779`)] <- "low"
gene$type[gene$`tumorf$FLJ16779` > median(gene$`tumorf$FLJ16779`)] <- "high"
rownames(gene) <- gene$rownames
pathology_stage <- merge(datasurvf, gene, by="row.names")
time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type
# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv
# +did not died
# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)
plot(fitkir)
ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "FLJ16779")
# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 177 41 32 2.54 4.6
## path_stage=low 180 31 40 2.03 4.6
##
## Chisq= 4.6 on 1 degrees of freedom, p= 0.03
LOC100303728
gene <- as.data.frame(tumorf$LOC100303728)
gene$rownames <- rownames(tumorf)
gene$type[gene$`tumorf$LOC100303728` <= median(gene$`tumorf$LOC100303728`)] <- "low"
gene$type[gene$`tumorf$LOC100303728` > median(gene$`tumorf$LOC100303728`)] <- "high"
rownames(gene) <- gene$rownames
pathology_stage <- merge(datasurvf, gene, by="row.names")
time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type
# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv
# +did not died
# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)
plot(fitkir)
ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "LOC100303728")
# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 178 47 37.9 2.17 4.6
## path_stage=low 179 25 34.1 2.41 4.6
##
## Chisq= 4.6 on 1 degrees of freedom, p= 0.03
PCDHA7
gene <- as.data.frame(tumorf$PCDHA7)
gene$rownames <- rownames(tumorf)
gene$type[gene$`tumorf$PCDHA7` <= median(gene$`tumorf$PCDHA7`)] <- "low"
gene$type[gene$`tumorf$PCDHA7` > median(gene$`tumorf$PCDHA7`)] <- "high"
rownames(gene) <- gene$rownames
pathology_stage <- merge(datasurvf, gene, by="row.names")
time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type
# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv
# +did not died
# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)
plot(fitkir)
ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "PCDHA7")
# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 179 38 34.9 0.268 0.522
## path_stage=low 178 34 37.1 0.253 0.522
##
## Chisq= 0.5 on 1 degrees of freedom, p= 0.5
SNTG1
gene <- as.data.frame(tumorf$SNTG1)
gene$rownames <- rownames(tumorf)
gene$type[gene$`tumorf$SNTG1` <= median(gene$`tumorf$SNTG1`)] <- "low"
gene$type[gene$`tumorf$SNTG1` > median(gene$`tumorf$SNTG1`)] <- "high"
rownames(gene) <- gene$rownames
pathology_stage <- merge(datasurvf, gene, by="row.names")
time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type
# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv
# +did not died
# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)
plot(fitkir)
ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "SNTG1")
# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 7 3 0.702 7.5217 7.65
## path_stage=low 350 69 71.298 0.0741 7.65
##
## Chisq= 7.6 on 1 degrees of freedom, p= 0.006
COX4I2
gene <- as.data.frame(tumorf$COX4I2)
gene$rownames <- rownames(tumorf)
gene$type[gene$`tumorf$COX4I2` <= median(gene$`tumorf$COX4I2`)] <- "low"
gene$type[gene$`tumorf$COX4I2` > median(gene$`tumorf$COX4I2`)] <- "high"
rownames(gene) <- gene$rownames
pathology_stage <- merge(datasurvf, gene, by="row.names")
time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type
# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv
# +did not died
# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)
plot(fitkir)
ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "COX4I2")
# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 179 44 31.7 4.75 8.57
## path_stage=low 178 28 40.3 3.74 8.57
##
## Chisq= 8.6 on 1 degrees of freedom, p= 0.003
NXF2B
gene <- as.data.frame(tumorf$NXF2B)
gene$rownames <- rownames(tumorf)
gene$type[gene$`tumorf$NXF2B` <= median(gene$`tumorf$NXF2B`)] <- "low"
gene$type[gene$`tumorf$NXF2B` > median(gene$`tumorf$NXF2B`)] <- "high"
rownames(gene) <- gene$rownames
pathology_stage <- merge(datasurvf, gene, by="row.names")
time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type
# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv
# +did not died
# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)
plot(fitkir)
ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "NXF2B")
# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 36 10 6.64 1.695 1.88
## path_stage=low 321 62 65.36 0.172 1.88
##
## Chisq= 1.9 on 1 degrees of freedom, p= 0.2
TAC3
gene <- as.data.frame(tumorf$TAC3)
gene$rownames <- rownames(tumorf)
gene$type[gene$`tumorf$TAC3` <= median(gene$`tumorf$TAC3`)] <- "low"
gene$type[gene$`tumorf$TAC3` > median(gene$`tumorf$TAC3`)] <- "high"
rownames(gene) <- gene$rownames
pathology_stage <- merge(datasurvf, gene, by="row.names")
time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type
# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv
# +did not died
# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)
plot(fitkir)
ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "TAC3")
# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 176 37 37.2 0.00104 0.00215
## path_stage=low 181 35 34.8 0.00111 0.00215
##
## Chisq= 0 on 1 degrees of freedom, p= 1
C20orf106
gene <- as.data.frame(tumorf$C20orf106)
gene$rownames <- rownames(tumorf)
gene$type[gene$`tumorf$C20orf106` <= median(gene$`tumorf$C20orf106`)] <- "low"
gene$type[gene$`tumorf$C20orf106` > median(gene$`tumorf$C20orf106`)] <- "high"
rownames(gene) <- gene$rownames
pathology_stage <- merge(datasurvf, gene, by="row.names")
time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type
# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv
# +did not died
# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)
plot(fitkir)
ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "C20orf106")
# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 175 41 30.5 3.62 6.38
## path_stage=low 182 31 41.5 2.66 6.38
##
## Chisq= 6.4 on 1 degrees of freedom, p= 0.01
LOC285780
gene <- as.data.frame(tumorf$LOC285780)
gene$rownames <- rownames(tumorf)
gene$type[gene$`tumorf$LOC285780` <= median(gene$`tumorf$LOC285780`)] <- "low"
gene$type[gene$`tumorf$LOC285780` > median(gene$`tumorf$LOC285780`)] <- "high"
rownames(gene) <- gene$rownames
pathology_stage <- merge(datasurvf, gene, by="row.names")
time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type
# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv
# +did not died
# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)
plot(fitkir)
ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "LOC285780")
# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 65 18 11.3 3.930 4.68
## path_stage=low 292 54 60.7 0.734 4.68
##
## Chisq= 4.7 on 1 degrees of freedom, p= 0.03
OR2T5
gene <- as.data.frame(tumorf$OR2T5)
gene$rownames <- rownames(tumorf)
gene$type[gene$`tumorf$OR2T5` <= median(gene$`tumorf$OR2T5`)] <- "low"
gene$type[gene$`tumorf$OR2T5` > median(gene$`tumorf$OR2T5`)] <- "high"
rownames(gene) <- gene$rownames
pathology_stage <- merge(datasurvf, gene, by="row.names")
time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type
# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv
# +did not died
# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)
plot(fitkir)
ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "OR2T5")
# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 15 5 2.32 3.107 3.24
## path_stage=low 342 67 69.68 0.103 3.24
##
## Chisq= 3.2 on 1 degrees of freedom, p= 0.07
TERF2IP
gene <- as.data.frame(tumorf$TERF2IP)
gene$rownames <- rownames(tumorf)
gene$type[gene$`tumorf$TERF2IP` <= median(gene$`tumorf$TERF2IP`)] <- "low"
gene$type[gene$`tumorf$TERF2IP` > median(gene$`tumorf$TERF2IP`)] <- "high"
rownames(gene) <- gene$rownames
pathology_stage <- merge(datasurvf, gene, by="row.names")
time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type
# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv
# +did not died
# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)
plot(fitkir)
ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "TERF2IP")
# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 181 45 34.5 3.18 6.12
## path_stage=low 176 27 37.5 2.93 6.12
##
## Chisq= 6.1 on 1 degrees of freedom, p= 0.01
CAPN7
gene <- as.data.frame(tumorf$CAPN7)
gene$rownames <- rownames(tumorf)
gene$type[gene$`tumorf$CAPN7` <= median(gene$`tumorf$CAPN7`)] <- "low"
gene$type[gene$`tumorf$CAPN7` > median(gene$`tumorf$CAPN7`)] <- "high"
rownames(gene) <- gene$rownames
pathology_stage <- merge(datasurvf, gene, by="row.names")
time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type
# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv
# +did not died
# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)
plot(fitkir)
ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "CAPN7")
# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 178 27 33.5 1.25 2.35
## path_stage=low 179 45 38.5 1.09 2.35
##
## Chisq= 2.4 on 1 degrees of freedom, p= 0.1
OSBPL3
gene <- as.data.frame(tumorf$OSBPL3)
gene$rownames <- rownames(tumorf)
gene$type[gene$`tumorf$OSBPL3` <= median(gene$`tumorf$OSBPL3`)] <- "low"
gene$type[gene$`tumorf$OSBPL3` > median(gene$`tumorf$OSBPL3`)] <- "high"
rownames(gene) <- gene$rownames
pathology_stage <- merge(datasurvf, gene, by="row.names")
time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type
# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv
# +did not died
# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)
plot(fitkir)
ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "OSBPL3")
# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 176 43 33.1 2.97 5.52
## path_stage=low 181 29 38.9 2.53 5.52
##
## Chisq= 5.5 on 1 degrees of freedom, p= 0.02
TRIM67
gene <- as.data.frame(tumorf$TRIM67)
gene$rownames <- rownames(tumorf)
gene$type[gene$`tumorf$TRIM67` <= median(gene$`tumorf$TRIM67`)] <- "low"
gene$type[gene$`tumorf$TRIM67` > median(gene$`tumorf$TRIM67`)] <- "high"
rownames(gene) <- gene$rownames
pathology_stage <- merge(datasurvf, gene, by="row.names")
time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type
# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv
# +did not died
# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)
plot(fitkir)
ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "TRIM671")
# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 179 42 36.5 0.826 1.68
## path_stage=low 178 30 35.5 0.849 1.68
##
## Chisq= 1.7 on 1 degrees of freedom, p= 0.2